mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-23 08:07:29 +01:00
Compare commits
3 Commits
master
...
feature/52
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c56591e4b | |||
|
0069c86e81
|
|||
|
c0d2d65f96
|
@@ -5,7 +5,6 @@ before:
|
||||
builds:
|
||||
- env:
|
||||
- CGO_ENABLED=1
|
||||
- CC=x86_64-linux-musl-gcc
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
|
||||
@@ -10,10 +10,7 @@ If you are upgrading from v1.5.0 you need to do another DB migration. This
|
||||
should not take long. For optimal database performance after the migration it is
|
||||
recommended to apply the new `optimize-db` flag, which runs the sqlite `ANALYZE`
|
||||
and `VACUUM` commands. Depending on your database size (more then 40GB) the
|
||||
`VACUUM` may take up to 2h. You can also run the `ANALYZE` command manually.
|
||||
While we are confident that the memory issue with the metricstore cleanup move
|
||||
policy is fixed, it is still recommended to use delete policy for cleanup.
|
||||
This is also the default.
|
||||
`VACUUM` may take up to 2h.
|
||||
|
||||
## Changes in 1.5.2
|
||||
|
||||
@@ -22,14 +19,6 @@ This is also the default.
|
||||
- **Memory spike in parquet writer**: Fixed memory spikes when using the
|
||||
metricstore move (archive) policy with the parquet writer. The writer now
|
||||
processes data in a streaming fashion to avoid accumulating large allocations.
|
||||
- **Top list query fixes**: Fixed top list queries in analysis and dashboard
|
||||
views.
|
||||
- **Exclude down nodes from HealthCheck**: Down nodes are now excluded from
|
||||
health checks in both the REST and NATS handlers.
|
||||
- **Node state priority order**: Node state determination now enforces a
|
||||
priority order. Exception: idle+down results in idle.
|
||||
- **Blocking ReceiveNats call**: Fixed a blocking NATS receive call in the
|
||||
metricstore.
|
||||
|
||||
### Database performance
|
||||
|
||||
@@ -44,16 +33,6 @@ This is also the default.
|
||||
write load.
|
||||
- **Increased default SQLite timeout**: The default SQLite connection timeout
|
||||
has been raised to reduce spurious timeout errors under load.
|
||||
- **Optimized stats queries**: Improved sortby handling in stats queries, fixed
|
||||
cache key passing, and simplified a stats query condition that caused an
|
||||
expensive unnecessary subquery.
|
||||
|
||||
### MetricStore performance
|
||||
|
||||
- **Sharded WAL consumer**: The WAL consumer is now sharded for significantly
|
||||
higher write throughput.
|
||||
- **NATS contention fix**: Fixed contention in the metricstore NATS ingestion
|
||||
path.
|
||||
|
||||
### NATS API
|
||||
|
||||
@@ -73,24 +52,6 @@ This is also the default.
|
||||
operation.
|
||||
- **Checkpoint archiving log**: Added an informational log message when the
|
||||
metricstore checkpoint archiving process runs.
|
||||
- **Auth failure context**: Auth failure log messages now include more context
|
||||
information.
|
||||
|
||||
### Behavior changes
|
||||
|
||||
- **DB-based metricHealth**: Replaced heuristic-based metric health with
|
||||
DB-based metric health for the node view, providing more accurate health
|
||||
status information.
|
||||
- **Removed minRunningFor filter remnants**: Cleaned up remaining `minRunningFor`
|
||||
references from the GraphQL schema and query builder.
|
||||
|
||||
### Frontend
|
||||
|
||||
- **Streamlined statsSeries**: Unified stats series calculation and rendering
|
||||
across plot components.
|
||||
- **Clarified plot titles**: Improved titles in dashboard and health views.
|
||||
- **Bumped frontend dependencies**: Updated frontend dependencies to latest
|
||||
versions.
|
||||
|
||||
### Dependencies
|
||||
|
||||
@@ -106,7 +67,7 @@ This is also the default.
|
||||
running has to be allowed to execute the journalctl command.
|
||||
- The user configuration keys for the ui have changed. Therefore old user
|
||||
configuration persisted in the database is not used anymore. It is recommended
|
||||
to configure the metrics shown in the ui-config section and remove all records
|
||||
to configure the metrics shown in the ui-config sestion and remove all records
|
||||
in the table after the update.
|
||||
- Currently energy footprint metrics of type energy are ignored for calculating
|
||||
total energy.
|
||||
|
||||
@@ -250,6 +250,12 @@ type TimeWeights {
|
||||
coreHours: [NullableFloat!]!
|
||||
}
|
||||
|
||||
enum ResampleAlgo {
|
||||
LTTB
|
||||
AVERAGE
|
||||
SIMPLE
|
||||
}
|
||||
|
||||
enum Aggregate {
|
||||
USER
|
||||
PROJECT
|
||||
@@ -340,6 +346,7 @@ type Query {
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): [JobMetricWithName!]!
|
||||
|
||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||
@@ -399,6 +406,7 @@ type Query {
|
||||
to: Time!
|
||||
page: PageRequest
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): NodesResultList!
|
||||
|
||||
clusterMetrics(
|
||||
|
||||
@@ -34,12 +34,8 @@ const configString = `
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"minimum-points": 600,
|
||||
"trigger": 300,
|
||||
"resolutions": [
|
||||
240,
|
||||
60
|
||||
]
|
||||
"default-policy": "medium",
|
||||
"default-algo": "lttb"
|
||||
},
|
||||
"api-allowed-ips": [
|
||||
"*"
|
||||
|
||||
@@ -344,18 +344,18 @@ func (s *Server) init() error {
|
||||
|
||||
// Server timeout defaults (in seconds)
|
||||
const (
|
||||
defaultReadHeaderTimeout = 20
|
||||
defaultWriteTimeout = 20
|
||||
defaultReadTimeout = 20
|
||||
defaultWriteTimeout = 20
|
||||
)
|
||||
|
||||
func (s *Server) Start(ctx context.Context) error {
|
||||
// Use configurable timeouts with defaults
|
||||
readHeaderTimeout := time.Duration(defaultReadHeaderTimeout) * time.Second
|
||||
readTimeout := time.Duration(defaultReadTimeout) * time.Second
|
||||
writeTimeout := time.Duration(defaultWriteTimeout) * time.Second
|
||||
|
||||
s.server = &http.Server{
|
||||
ReadHeaderTimeout: readHeaderTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
ReadTimeout: readTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
Handler: s.router,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
"target-path": "./var/nodestate-archive"
|
||||
},
|
||||
"resampling": {
|
||||
"minimum-points": 600,
|
||||
"trigger": 180,
|
||||
"resolutions": [240, 60]
|
||||
"default-policy": "medium",
|
||||
"default-algo": "lttb"
|
||||
},
|
||||
"api-subjects": {
|
||||
"subject-job-event": "cc.job.event",
|
||||
|
||||
48
go.mod
48
go.mod
@@ -9,26 +9,26 @@ tool (
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.88
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.9.1
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.10.0
|
||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.3
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.11
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.11
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.4
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.12
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.12
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.1
|
||||
github.com/coreos/go-oidc/v3 v3.17.0
|
||||
github.com/expr-lang/expr v1.17.8
|
||||
github.com/go-chi/chi/v5 v5.2.5
|
||||
github.com/go-chi/cors v1.2.2
|
||||
github.com/go-co-op/gocron/v2 v2.19.1
|
||||
github.com/go-ldap/ldap/v3 v3.4.12
|
||||
github.com/go-ldap/ldap/v3 v3.4.13
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1
|
||||
github.com/google/gops v0.3.29
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/mattn/go-sqlite3 v1.14.34
|
||||
github.com/mattn/go-sqlite3 v1.14.37
|
||||
github.com/parquet-go/parquet-go v0.29.0
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||
@@ -48,20 +48,20 @@ require (
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/andybalholm/brotli v1.2.0 // indirect
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.7 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.12 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.8 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.20 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.20 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.21 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.12 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.20 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.20 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.8 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.9 // indirect
|
||||
github.com/aws/smithy-go v1.24.2 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
@@ -94,9 +94,9 @@ require (
|
||||
github.com/nats-io/nats.go v1.49.0 // indirect
|
||||
github.com/nats-io/nkeys v0.4.15 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/oapi-codegen/runtime v1.2.0 // indirect
|
||||
github.com/oapi-codegen/runtime v1.3.0 // indirect
|
||||
github.com/parquet-go/bitpack v1.0.0 // indirect
|
||||
github.com/parquet-go/jsonlite v1.4.0 // indirect
|
||||
github.com/parquet-go/jsonlite v1.5.0 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.26 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
@@ -110,7 +110,7 @@ require (
|
||||
github.com/urfave/cli/v2 v2.27.7 // indirect
|
||||
github.com/urfave/cli/v3 v3.7.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.4 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/mod v0.34.0 // indirect
|
||||
golang.org/x/net v0.52.0 // indirect
|
||||
|
||||
100
go.sum
100
go.sum
@@ -4,8 +4,8 @@ github.com/99designs/gqlgen v0.17.88 h1:neMQDgehMwT1vYIOx/w5ZYPUU/iMNAJzRO44I5In
|
||||
github.com/99designs/gqlgen v0.17.88/go.mod h1:qeqYFEgOeSKqWedOjogPizimp2iu4E23bdPvl4jTYic=
|
||||
github.com/Azure/go-ntlmssp v0.1.0 h1:DjFo6YtWzNqNvQdrwEyr/e4nhU3vRiwenz5QX7sFz+A=
|
||||
github.com/Azure/go-ntlmssp v0.1.0/go.mod h1:NYqdhxd/8aAct/s4qSYZEerdPuH1liG2/X9DiVTbhpk=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.9.1 h1:eplKhXQyGAElBGCEGdmxwj7fLv26Op16uK0KxUePDak=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.9.1/go.mod h1:FwD8vnTIbBM3ngeLNKmCvp9FoSjQZm7xnuaVxEKR23o=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.10.0 h1:McCcEwc1j942hV54JAzsB/pcArP6A5FoQtjzW2H7K9U=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.10.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A=
|
||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q=
|
||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY=
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
|
||||
@@ -39,42 +39,42 @@ github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7D
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.3 h1:4kQ/fa22KjDt13QCy1+bYADvdgcxpfH18f0zP542kZA=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.3/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 h1:N4lRUXZpZ1KVEUn6hxtco/1d2lgYhNn1fHkkl8WhlyQ=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.11 h1:ftxI5sgz8jZkckuUHXfC/wMUc8u3fG1vQS0plr2F2Zs=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.11/go.mod h1:twF11+6ps9aNRKEDimksp923o44w/Thk9+8YIlzWMmo=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.11 h1:NdV8cwCcAXrCWyxArt58BrvZJ9pZ9Fhf9w6Uh5W3Uyc=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.11/go.mod h1:30yY2zqkMPdrvxBqzI9xQCM+WrlrZKSOpSJEsylVU+8=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19 h1:INUvJxmhdEbVulJYHI061k4TVuS3jzzthNvjqvVvTKM=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19/go.mod h1:FpZN2QISLdEBWkayloda+sZjVJL+e9Gl0k1SyTgcswU=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19 h1:/sECfyq2JTifMI2JPyZ4bdRN77zJmr6SrS1eL3augIA=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19/go.mod h1:dMf8A5oAqr9/oxOfLkC/c2LU/uMcALP0Rgn2BD5LWn0=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19 h1:AWeJMk33GTBf6J20XJe6qZoRSJo0WfUhsMdUKhoODXE=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19/go.mod h1:+GWrYoaAsV7/4pNHpwh1kiNLXkKaSoppxQq9lbH8Ejw=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5 h1:clHU5fm//kWS1C2HgtgWxfQbFbx4b6rx+5jzhgX9HrI=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5/go.mod h1:O3h0IK87yXci+kg6flUKzJnWeziQUKciKrLjcatSNcY=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20 h1:qi3e/dmpdONhj1RyIZdi6DKKpDXS5Lb8ftr3p7cyHJc=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20/go.mod h1:V1K+TeJVD5JOk3D9e5tsX2KUdL7BlB+FV6cBhdobN8c=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6 h1:XAq62tBTJP/85lFD5oqOOe7YYgWxY9LvWq8plyDvDVg=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11 h1:BYf7XNsJMzl4mObARUBUib+j2tf0U//JAAtTnYqvqCw=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11/go.mod h1:aEUS4WrNk/+FxkBZZa7tVgp4pGH+kFGW40Y8rCPqt5g=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19 h1:X1Tow7suZk9UCJHE1Iw9GMZJJl0dAnKXXP1NaSDHwmw=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19/go.mod h1:/rARO8psX+4sfjUQXp5LLifjUt8DuATZ31WptNJTyQA=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19 h1:JnQeStZvPHFHeyky/7LbMlyQjUa+jIBj36OlWm0pzIk=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19/go.mod h1:HGyasyHvYdFQeJhvDHfH7HXkHh57htcJGKDZ+7z+I24=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.0 h1:zyKY4OxzUImu+DigelJI9o49QQv8CjREs5E1CywjtIA=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.0/go.mod h1:NF3JcMGOiARAss1ld3WGORCw71+4ExDD2cbbdKS5PpA=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.7 h1:Y2cAXlClHsXkkOvWZFXATr34b0hxxloeQu/pAZz2row=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.7/go.mod h1:idzZ7gmDeqeNrSPkdbtMp9qWMgcBwykA7P7Rzh5DXVU=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.12 h1:iSsvB9EtQ09YrsmIc44Heqlx5ByGErqhPK1ZQLppias=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.12/go.mod h1:fEWYKTRGoZNl8tZ77i61/ccwOMJdGxwOhWCkp6TXAr0=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16 h1:EnUdUqRP1CNzt2DkV67tJx6XDN4xlfBFm+bzeNOQVb0=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16/go.mod h1:Jic/xv0Rq/pFNCh3WwpH4BEqdbSAl+IyHro8LbibHD8=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.8 h1:XQTQTF75vnug2TXS8m7CVJfC2nniYPZnO1D4Np761Oo=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.8/go.mod h1:Xgx+PR1NUOjNmQY+tRMnouRp83JRM8pRMw/vCaVhPkI=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.4 h1:10f50G7WyU02T56ox1wWXq+zTX9I1zxG46HYuG1hH/k=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.4/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 h1:3kGOqnh1pPeddVa/E37XNTaWJ8W6vrbYV9lJEkCnhuY=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.12 h1:O3csC7HUGn2895eNrLytOJQdoL2xyJy0iYXhoZ1OmP0=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.12/go.mod h1:96zTvoOFR4FURjI+/5wY1vc1ABceROO4lWgWJuxgy0g=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.12 h1:oqtA6v+y5fZg//tcTWahyN9PEn5eDU/Wpvc2+kJ4aY8=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.12/go.mod h1:U3R1RtSHx6NB0DvEQFGyf/0sbrpJrluENHdPy1j/3TE=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20 h1:zOgq3uezl5nznfoK3ODuqbhVg1JzAGDUhXOsU0IDCAo=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20/go.mod h1:z/MVwUARehy6GAg/yQ1GO2IMl0k++cu1ohP9zo887wE=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.20 h1:CNXO7mvgThFGqOFgbNAP2nol2qAWBOGfqR/7tQlvLmc=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.20/go.mod h1:oydPDJKcfMhgfcgBUZaG+toBbwy8yPWubJXBVERtI4o=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.20 h1:tN6W/hg+pkM+tf9XDkWUbDEjGLb+raoBMFsTodcoYKw=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.20/go.mod h1:YJ898MhD067hSHA6xYCx5ts/jEd8BSOLtQDL3iZsvbc=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 h1:qYQ4pzQ2Oz6WpQ8T3HvGHnZydA72MnLuFK9tJwmrbHw=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6/go.mod h1:O3h0IK87yXci+kg6flUKzJnWeziQUKciKrLjcatSNcY=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.21 h1:SwGMTMLIlvDNyhMteQ6r8IJSBPlRdXX5d4idhIGbkXA=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.21/go.mod h1:UUxgWxofmOdAMuqEsSppbDtGKLfR04HGsD0HXzvhI1k=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 h1:5EniKhLZe4xzL7a+fU3C2tfUN4nWIqlLesfrjkuPFTY=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.12 h1:qtJZ70afD3ISKWnoX3xB0J2otEqu3LqicRcDBqsj0hQ=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.12/go.mod h1:v2pNpJbRNl4vEUWEh5ytQok0zACAKfdmKS51Hotc3pQ=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.20 h1:2HvVAIq+YqgGotK6EkMf+KIEqTISmTYh5zLpYyeTo1Y=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.20/go.mod h1:V4X406Y666khGa8ghKmphma/7C0DAtEQYhkq9z4vpbk=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.20 h1:siU1A6xjUZ2N8zjTHSXFhB9L/2OY8Dqs0xXiLjF30jA=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.20/go.mod h1:4TLZCmVJDM3FOu5P5TJP0zOlu9zWgDWU7aUxWbr+rcw=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.1 h1:csi9NLpFZXb9fxY7rS1xVzgPRGMt7MSNWeQ6eo247kE=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.1/go.mod h1:qXVal5H0ChqXP63t6jze5LmFalc7+ZE7wOdLtZ0LCP0=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.8 h1:0GFOLzEbOyZABS3PhYfBIx2rNBACYcKty+XGkTgw1ow=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.8/go.mod h1:LXypKvk85AROkKhOG6/YEcHFPoX+prKTowKnVdcaIxE=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 h1:kiIDLZ005EcKomYYITtfsjn7dtOwHDOFy7IbPXKek2o=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.13/go.mod h1:2h/xGEowcW/g38g06g3KpRWDlT+OTfxxI0o1KqayAB8=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 h1:jzKAXIlhZhJbnYwHbvUQZEB8KfgAEuG0dc08Bkda7NU=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17/go.mod h1:Al9fFsXjv4KfbzQHGe6V4NZSZQXecFcvaIF4e70FoRA=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.9 h1:Cng+OOwCHmFljXIxpEVXAGMnBia8MSU6Ch5i9PgBkcU=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.9/go.mod h1:LrlIndBDdjA/EeXeyNBle+gyCwTlizzW5ycgWnvIxkk=
|
||||
github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng=
|
||||
github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
@@ -109,8 +109,8 @@ github.com/go-co-op/gocron/v2 v2.19.1 h1:B4iLeA0NB/2iO3EKQ7NfKn5KsQgZfjb2fkvoZJU
|
||||
github.com/go-co-op/gocron/v2 v2.19.1/go.mod h1:5lEiCKk1oVJV39Zg7/YG10OnaVrDAV5GGR6O0663k6U=
|
||||
github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
|
||||
github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
|
||||
github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4=
|
||||
github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo=
|
||||
github.com/go-ldap/ldap/v3 v3.4.13 h1:+x1nG9h+MZN7h/lUi5Q3UZ0fJ1GyDQYbPvbuH38baDQ=
|
||||
github.com/go-ldap/ldap/v3 v3.4.13/go.mod h1:LxsGZV6vbaK0sIvYfsv47rfh4ca0JXokCoKjZxsszv0=
|
||||
github.com/go-openapi/jsonpointer v0.22.5 h1:8on/0Yp4uTb9f4XvTrM2+1CPrV05QPZXu+rvu2o9jcA=
|
||||
github.com/go-openapi/jsonpointer v0.22.5/go.mod h1:gyUR3sCvGSWchA2sUBJGluYMbe1zazrYWIkWPjjMUY0=
|
||||
github.com/go-openapi/jsonreference v0.21.5 h1:6uCGVXU/aNF13AQNggxfysJ+5ZcU4nEAe+pJyVWRdiE=
|
||||
@@ -213,8 +213,8 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk=
|
||||
github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.37 h1:3DOZp4cXis1cUIpCfXLtmlGolNLp2VEqhiB/PARNBIg=
|
||||
github.com/mattn/go-sqlite3 v1.14.37/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk=
|
||||
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
@@ -229,13 +229,13 @@ github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4=
|
||||
github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||
github.com/oapi-codegen/runtime v1.2.0 h1:RvKc1CVS1QeKSNzO97FBQbSMZyQ8s6rZd+LpmzwHMP4=
|
||||
github.com/oapi-codegen/runtime v1.2.0/go.mod h1:Y7ZhmmlE8ikZOmuHRRndiIm7nf3xcVv+YMweKgG1DT0=
|
||||
github.com/oapi-codegen/runtime v1.3.0 h1:vyK1zc0gDWWXgk2xoQa4+X4RNNc5SL2RbTpJS/4vMYA=
|
||||
github.com/oapi-codegen/runtime v1.3.0/go.mod h1:kOdeacKy7t40Rclb1je37ZLFboFxh+YLy0zaPCMibPY=
|
||||
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
||||
github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA=
|
||||
github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs=
|
||||
github.com/parquet-go/jsonlite v1.4.0 h1:RTG7prqfO0HD5egejU8MUDBN8oToMj55cgSV1I0zNW4=
|
||||
github.com/parquet-go/jsonlite v1.4.0/go.mod h1:nDjpkpL4EOtqs6NQugUsi0Rleq9sW/OtC1NnZEnxzF0=
|
||||
github.com/parquet-go/jsonlite v1.5.0 h1:ulS7lNWdPwiqDMLzTiXHYmIUhu99mavZh2iAVdXet3g=
|
||||
github.com/parquet-go/jsonlite v1.5.0/go.mod h1:nDjpkpL4EOtqs6NQugUsi0Rleq9sW/OtC1NnZEnxzF0=
|
||||
github.com/parquet-go/parquet-go v0.29.0 h1:xXlPtFVR51jpSVzf+cgHnNIcb7Xet+iuvkbe0HIm90Y=
|
||||
github.com/parquet-go/parquet-go v0.29.0/go.mod h1:navtkAYr2LGoJVp141oXPlO/sxLvaOe3la2JEoD8+rg=
|
||||
github.com/pierrec/lz4/v4 v4.1.26 h1:GrpZw1gZttORinvzBdXPUXATeqlJjqUG/D87TKMnhjY=
|
||||
@@ -250,8 +250,8 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=
|
||||
github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=
|
||||
github.com/prometheus/procfs v0.20.0 h1:AA7aCvjxwAquZAlonN7888f2u4IN8WVeFgBi4k82M4Q=
|
||||
github.com/prometheus/procfs v0.20.0/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
|
||||
github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
|
||||
github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
|
||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||
@@ -299,8 +299,8 @@ github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3i
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
||||
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
||||
go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ=
|
||||
go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
|
||||
@@ -355,7 +355,7 @@ func TestRestApi(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("CheckArchive", func(t *testing.T) {
|
||||
data, err := metricdispatch.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||
data, err := metricdispatch.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60, "")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -301,7 +301,7 @@ func (api *RestAPI) getCompleteJobByID(rw http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
|
||||
if r.URL.Query().Get("all-metrics") == "true" {
|
||||
data, err = metricdispatch.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||
data, err = metricdispatch.LoadData(job, nil, scopes, r.Context(), resolution, "")
|
||||
if err != nil {
|
||||
cclog.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
@@ -397,7 +397,7 @@ func (api *RestAPI) getJobByID(rw http.ResponseWriter, r *http.Request) {
|
||||
resolution = max(resolution, mc.Timestep)
|
||||
}
|
||||
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, r.Context(), resolution, "")
|
||||
if err != nil {
|
||||
cclog.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
@@ -1078,7 +1078,7 @@ func (api *RestAPI) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
resolver := graph.GetResolverInstance()
|
||||
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil, nil)
|
||||
if err != nil {
|
||||
if err := json.NewEncoder(rw).Encode(Response{
|
||||
Error: &struct {
|
||||
|
||||
@@ -59,7 +59,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||
}
|
||||
|
||||
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 0, "") // 0 Resulotion-Value retrieves highest res (60s)
|
||||
if err != nil {
|
||||
cclog.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
|
||||
@@ -106,12 +106,12 @@ type NodeStateRetention struct {
|
||||
}
|
||||
|
||||
type ResampleConfig struct {
|
||||
// Minimum number of points to trigger resampling of data
|
||||
MinimumPoints int `json:"minimum-points"`
|
||||
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||
Resolutions []int `json:"resolutions"`
|
||||
// Trigger next zoom level at less than this many visible datapoints
|
||||
Trigger int `json:"trigger"`
|
||||
// Default resample policy when no user preference is set ("low", "medium", "high")
|
||||
DefaultPolicy string `json:"default-policy"`
|
||||
// Default resample algorithm when no user preference is set ("lttb", "average", "simple")
|
||||
DefaultAlgo string `json:"default-algo"`
|
||||
// Policy-derived target point count (set dynamically from user preference, not from config.json)
|
||||
TargetPoints int `json:"targetPoints,omitempty"`
|
||||
}
|
||||
|
||||
type NATSConfig struct {
|
||||
@@ -155,7 +155,24 @@ func Init(mainConfig json.RawMessage) {
|
||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||
}
|
||||
|
||||
if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 {
|
||||
resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints)
|
||||
if Keys.EnableResampling != nil {
|
||||
policy := Keys.EnableResampling.DefaultPolicy
|
||||
if policy == "" {
|
||||
policy = "medium"
|
||||
}
|
||||
resampler.SetMinimumRequiredPoints(targetPointsForPolicy(policy))
|
||||
}
|
||||
}
|
||||
|
||||
func targetPointsForPolicy(policy string) int {
|
||||
switch policy {
|
||||
case "low":
|
||||
return 200
|
||||
case "medium":
|
||||
return 500
|
||||
case "high":
|
||||
return 1000
|
||||
default:
|
||||
return 500
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,23 +92,17 @@ var configSchema = `
|
||||
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"minimum-points": {
|
||||
"description": "Minimum points to trigger resampling of time-series data.",
|
||||
"type": "integer"
|
||||
"default-policy": {
|
||||
"description": "Default resample policy when no user preference is set.",
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"]
|
||||
},
|
||||
"trigger": {
|
||||
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||
"type": "integer"
|
||||
},
|
||||
"resolutions": {
|
||||
"description": "Array of resampling target resolutions, in seconds.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
"default-algo": {
|
||||
"description": "Default resample algorithm when no user preference is set.",
|
||||
"type": "string",
|
||||
"enum": ["lttb", "average", "simple"]
|
||||
}
|
||||
},
|
||||
"required": ["trigger", "resolutions"]
|
||||
}
|
||||
},
|
||||
"api-subjects": {
|
||||
"description": "NATS subjects configuration for subscribing to job and node events.",
|
||||
|
||||
@@ -326,7 +326,7 @@ type ComplexityRoot struct {
|
||||
Clusters func(childComplexity int) int
|
||||
GlobalMetrics func(childComplexity int) int
|
||||
Job func(childComplexity int, id string) int
|
||||
JobMetrics func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope, resolution *int) int
|
||||
JobMetrics func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope, resolution *int, resampleAlgo *model.ResampleAlgo) int
|
||||
JobStats func(childComplexity int, id string, metrics []string) int
|
||||
Jobs func(childComplexity int, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) int
|
||||
JobsFootprints func(childComplexity int, filter []*model.JobFilter, metrics []string) int
|
||||
@@ -334,7 +334,7 @@ type ComplexityRoot struct {
|
||||
JobsStatistics func(childComplexity int, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) int
|
||||
Node func(childComplexity int, id string) int
|
||||
NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int
|
||||
NodeMetricsList func(childComplexity int, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int
|
||||
NodeMetricsList func(childComplexity int, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int, resampleAlgo *model.ResampleAlgo) int
|
||||
NodeStates func(childComplexity int, filter []*model.NodeFilter) int
|
||||
NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int
|
||||
Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
|
||||
@@ -482,7 +482,7 @@ type QueryResolver interface {
|
||||
NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error)
|
||||
NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error)
|
||||
Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error)
|
||||
JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int, resampleAlgo *model.ResampleAlgo) ([]*model.JobMetricWithName, error)
|
||||
JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error)
|
||||
ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.NamedStatsWithScope, error)
|
||||
Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error)
|
||||
@@ -491,7 +491,7 @@ type QueryResolver interface {
|
||||
JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error)
|
||||
RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error)
|
||||
NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error)
|
||||
NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error)
|
||||
NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int, resampleAlgo *model.ResampleAlgo) (*model.NodesResultList, error)
|
||||
ClusterMetrics(ctx context.Context, cluster string, metrics []string, from time.Time, to time.Time) (*model.ClusterMetrics, error)
|
||||
}
|
||||
type SubClusterResolver interface {
|
||||
@@ -1665,7 +1665,7 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return e.ComplexityRoot.Query.JobMetrics(childComplexity, args["id"].(string), args["metrics"].([]string), args["scopes"].([]schema.MetricScope), args["resolution"].(*int)), true
|
||||
return e.ComplexityRoot.Query.JobMetrics(childComplexity, args["id"].(string), args["metrics"].([]string), args["scopes"].([]schema.MetricScope), args["resolution"].(*int), args["resampleAlgo"].(*model.ResampleAlgo)), true
|
||||
case "Query.jobStats":
|
||||
if e.ComplexityRoot.Query.JobStats == nil {
|
||||
break
|
||||
@@ -1753,7 +1753,7 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return e.ComplexityRoot.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["stateFilter"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true
|
||||
return e.ComplexityRoot.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["stateFilter"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int), args["resampleAlgo"].(*model.ResampleAlgo)), true
|
||||
case "Query.nodeStates":
|
||||
if e.ComplexityRoot.Query.NodeStates == nil {
|
||||
break
|
||||
@@ -2524,6 +2524,12 @@ type TimeWeights {
|
||||
coreHours: [NullableFloat!]!
|
||||
}
|
||||
|
||||
enum ResampleAlgo {
|
||||
LTTB
|
||||
AVERAGE
|
||||
SIMPLE
|
||||
}
|
||||
|
||||
enum Aggregate {
|
||||
USER
|
||||
PROJECT
|
||||
@@ -2614,6 +2620,7 @@ type Query {
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): [JobMetricWithName!]!
|
||||
|
||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||
@@ -2673,6 +2680,7 @@ type Query {
|
||||
to: Time!
|
||||
page: PageRequest
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): NodesResultList!
|
||||
|
||||
clusterMetrics(
|
||||
@@ -3006,6 +3014,11 @@ func (ec *executionContext) field_Query_jobMetrics_args(ctx context.Context, raw
|
||||
return nil, err
|
||||
}
|
||||
args["resolution"] = arg3
|
||||
arg4, err := graphql.ProcessArgField(ctx, rawArgs, "resampleAlgo", ec.unmarshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
args["resampleAlgo"] = arg4
|
||||
return args, nil
|
||||
}
|
||||
|
||||
@@ -3183,6 +3196,11 @@ func (ec *executionContext) field_Query_nodeMetricsList_args(ctx context.Context
|
||||
return nil, err
|
||||
}
|
||||
args["resolution"] = arg9
|
||||
arg10, err := graphql.ProcessArgField(ctx, rawArgs, "resampleAlgo", ec.unmarshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
args["resampleAlgo"] = arg10
|
||||
return args, nil
|
||||
}
|
||||
|
||||
@@ -9436,7 +9454,7 @@ func (ec *executionContext) _Query_jobMetrics(ctx context.Context, field graphql
|
||||
ec.fieldContext_Query_jobMetrics,
|
||||
func(ctx context.Context) (any, error) {
|
||||
fc := graphql.GetFieldContext(ctx)
|
||||
return ec.Resolvers.Query().JobMetrics(ctx, fc.Args["id"].(string), fc.Args["metrics"].([]string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["resolution"].(*int))
|
||||
return ec.Resolvers.Query().JobMetrics(ctx, fc.Args["id"].(string), fc.Args["metrics"].([]string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["resolution"].(*int), fc.Args["resampleAlgo"].(*model.ResampleAlgo))
|
||||
},
|
||||
nil,
|
||||
ec.marshalNJobMetricWithName2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobMetricWithNameᚄ,
|
||||
@@ -9917,7 +9935,7 @@ func (ec *executionContext) _Query_nodeMetricsList(ctx context.Context, field gr
|
||||
ec.fieldContext_Query_nodeMetricsList,
|
||||
func(ctx context.Context) (any, error) {
|
||||
fc := graphql.GetFieldContext(ctx)
|
||||
return ec.Resolvers.Query().NodeMetricsList(ctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["stateFilter"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int))
|
||||
return ec.Resolvers.Query().NodeMetricsList(ctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["stateFilter"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int), fc.Args["resampleAlgo"].(*model.ResampleAlgo))
|
||||
},
|
||||
nil,
|
||||
ec.marshalNNodesResultList2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList,
|
||||
@@ -19672,6 +19690,22 @@ func (ec *executionContext) unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockp
|
||||
return &res, graphql.ErrorOnPath(ctx, err)
|
||||
}
|
||||
|
||||
func (ec *executionContext) unmarshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo(ctx context.Context, v any) (*model.ResampleAlgo, error) {
|
||||
if v == nil {
|
||||
return nil, nil
|
||||
}
|
||||
var res = new(model.ResampleAlgo)
|
||||
err := res.UnmarshalGQL(v)
|
||||
return res, graphql.ErrorOnPath(ctx, err)
|
||||
}
|
||||
|
||||
func (ec *executionContext) marshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo(ctx context.Context, sel ast.SelectionSet, v *model.ResampleAlgo) graphql.Marshaler {
|
||||
if v == nil {
|
||||
return graphql.Null
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func (ec *executionContext) unmarshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState(ctx context.Context, v any) (*schema.SchedulerState, error) {
|
||||
if v == nil {
|
||||
return nil, nil
|
||||
|
||||
@@ -328,6 +328,63 @@ func (e Aggregate) MarshalJSON() ([]byte, error) {
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
type ResampleAlgo string
|
||||
|
||||
const (
|
||||
ResampleAlgoLttb ResampleAlgo = "LTTB"
|
||||
ResampleAlgoAverage ResampleAlgo = "AVERAGE"
|
||||
ResampleAlgoSimple ResampleAlgo = "SIMPLE"
|
||||
)
|
||||
|
||||
var AllResampleAlgo = []ResampleAlgo{
|
||||
ResampleAlgoLttb,
|
||||
ResampleAlgoAverage,
|
||||
ResampleAlgoSimple,
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) IsValid() bool {
|
||||
switch e {
|
||||
case ResampleAlgoLttb, ResampleAlgoAverage, ResampleAlgoSimple:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *ResampleAlgo) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
}
|
||||
|
||||
*e = ResampleAlgo(str)
|
||||
if !e.IsValid() {
|
||||
return fmt.Errorf("%s is not a valid ResampleAlgo", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
func (e *ResampleAlgo) UnmarshalJSON(b []byte) error {
|
||||
s, err := strconv.Unquote(string(b))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return e.UnmarshalGQL(s)
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) MarshalJSON() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
e.MarshalGQL(&buf)
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
type SortByAggregate string
|
||||
|
||||
const (
|
||||
|
||||
145
internal/graph/resample.go
Normal file
145
internal/graph/resample.go
Normal file
@@ -0,0 +1,145 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
)
|
||||
|
||||
// resolveResolutionFromPolicy reads the user's resample policy preference and
|
||||
// computes a resolution based on job duration and metric frequency. Returns nil
|
||||
// if the user has no policy set.
|
||||
func resolveResolutionFromPolicy(ctx context.Context, duration int64, cluster string, metrics []string) *int {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
conf, err := repository.GetUserCfgRepo().GetUIConfig(user)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
policyVal, ok := conf["plotConfiguration_resamplePolicy"]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
policyStr, ok := policyVal.(string)
|
||||
if !ok || policyStr == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
policy := metricdispatch.ResamplePolicy(policyStr)
|
||||
targetPoints := metricdispatch.TargetPointsForPolicy(policy)
|
||||
if targetPoints == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the smallest metric frequency across the requested metrics
|
||||
frequency := smallestFrequency(cluster, metrics)
|
||||
if frequency <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
res := metricdispatch.ComputeResolution(duration, int64(frequency), targetPoints)
|
||||
return &res
|
||||
}
|
||||
|
||||
// resolveResampleAlgo returns the resampling algorithm name to use, checking
|
||||
// the explicit GraphQL parameter first, then the user's preference.
|
||||
func resolveResampleAlgo(ctx context.Context, resampleAlgo *model.ResampleAlgo) string {
|
||||
if resampleAlgo != nil {
|
||||
return strings.ToLower(resampleAlgo.String())
|
||||
}
|
||||
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
conf, err := repository.GetUserCfgRepo().GetUIConfig(user)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
algoVal, ok := conf["plotConfiguration_resampleAlgo"]
|
||||
if ok {
|
||||
if algoStr, ok := algoVal.(string); ok && algoStr != "" {
|
||||
return algoStr
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to global default algo
|
||||
if config.Keys.EnableResampling != nil && config.Keys.EnableResampling.DefaultAlgo != "" {
|
||||
return config.Keys.EnableResampling.DefaultAlgo
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveResolutionFromDefaultPolicy computes a resolution using the global
|
||||
// default policy from config. Returns nil if no policy is configured.
|
||||
func resolveResolutionFromDefaultPolicy(duration int64, cluster string, metrics []string) *int {
|
||||
cfg := config.Keys.EnableResampling
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
policyStr := cfg.DefaultPolicy
|
||||
if policyStr == "" {
|
||||
policyStr = "medium"
|
||||
}
|
||||
|
||||
policy := metricdispatch.ResamplePolicy(policyStr)
|
||||
targetPoints := metricdispatch.TargetPointsForPolicy(policy)
|
||||
if targetPoints == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
frequency := smallestFrequency(cluster, metrics)
|
||||
if frequency <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
res := metricdispatch.ComputeResolution(duration, int64(frequency), targetPoints)
|
||||
return &res
|
||||
}
|
||||
|
||||
// smallestFrequency returns the smallest metric timestep (in seconds) among the
|
||||
// requested metrics for the given cluster. Falls back to 0 if nothing is found.
|
||||
func smallestFrequency(cluster string, metrics []string) int {
|
||||
cl := archive.GetCluster(cluster)
|
||||
if cl == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
minFreq := 0
|
||||
for _, mc := range cl.MetricConfig {
|
||||
if len(metrics) > 0 {
|
||||
found := false
|
||||
for _, m := range metrics {
|
||||
if mc.Name == m {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if minFreq == 0 || mc.Timestep < minFreq {
|
||||
minFreq = mc.Timestep
|
||||
}
|
||||
}
|
||||
|
||||
return minFreq
|
||||
}
|
||||
@@ -498,24 +498,30 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
}
|
||||
|
||||
// JobMetrics is the resolver for the jobMetrics field.
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int, resampleAlgo *model.ResampleAlgo) ([]*model.JobMetricWithName, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while querying job for metrics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||
// Resolve resolution: explicit param > user policy > global config > 0
|
||||
if resolution == nil {
|
||||
resolution = resolveResolutionFromPolicy(ctx, int64(job.Duration), job.Cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
if config.Keys.EnableResampling != nil {
|
||||
resolution = resolveResolutionFromDefaultPolicy(int64(job.Duration), job.Cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
algoName := resolveResampleAlgo(ctx, resampleAlgo)
|
||||
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, ctx, *resolution, algoName)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while loading job data")
|
||||
return nil, err
|
||||
@@ -676,11 +682,6 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
// Use request-scoped cache: multiple aliases with same (filter, groupBy)
|
||||
// but different sortBy/page hit the DB only once.
|
||||
if cache := getStatsGroupCache(ctx); cache != nil {
|
||||
// Ensure the sort field is computed even if not in the GraphQL selection,
|
||||
// because sortAndPageStats will sort by it in memory.
|
||||
if sortBy != nil {
|
||||
reqFields[sortByFieldName(*sortBy)] = true
|
||||
}
|
||||
key := statsCacheKey(filter, groupBy, reqFields)
|
||||
var allStats []*model.JobsStatistics
|
||||
allStats, err = cache.getOrCompute(key, func() ([]*model.JobsStatistics, error) {
|
||||
@@ -877,12 +878,17 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
}
|
||||
|
||||
// NodeMetricsList is the resolver for the nodeMetricsList field.
|
||||
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int, resampleAlgo *model.ResampleAlgo) (*model.NodesResultList, error) {
|
||||
// Resolve resolution: explicit param > user policy > global config > 0
|
||||
duration := int64(to.Sub(from).Seconds())
|
||||
if resolution == nil {
|
||||
resolution = resolveResolutionFromPolicy(ctx, duration, cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
resolution = resolveResolutionFromDefaultPolicy(duration, cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
@@ -906,8 +912,10 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
|
||||
}
|
||||
}
|
||||
|
||||
algoName := resolveResampleAlgo(ctx, resampleAlgo)
|
||||
|
||||
// data -> map hostname:jobdata
|
||||
data, err := metricdispatch.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx)
|
||||
data, err := metricdispatch.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx, algoName)
|
||||
if err != nil {
|
||||
cclog.Warn("error while loading node data (Resolver.NodeMetricsList")
|
||||
return nil, err
|
||||
|
||||
@@ -107,33 +107,6 @@ func sortAndPageStats(allStats []*model.JobsStatistics, sortBy *model.SortByAggr
|
||||
return sorted
|
||||
}
|
||||
|
||||
// sortByFieldName maps a SortByAggregate enum to the corresponding reqFields key.
|
||||
// This ensures the DB computes the column that sortAndPageStats will sort by.
|
||||
func sortByFieldName(sortBy model.SortByAggregate) string {
|
||||
switch sortBy {
|
||||
case model.SortByAggregateTotaljobs:
|
||||
return "totalJobs"
|
||||
case model.SortByAggregateTotalusers:
|
||||
return "totalUsers"
|
||||
case model.SortByAggregateTotalwalltime:
|
||||
return "totalWalltime"
|
||||
case model.SortByAggregateTotalnodes:
|
||||
return "totalNodes"
|
||||
case model.SortByAggregateTotalnodehours:
|
||||
return "totalNodeHours"
|
||||
case model.SortByAggregateTotalcores:
|
||||
return "totalCores"
|
||||
case model.SortByAggregateTotalcorehours:
|
||||
return "totalCoreHours"
|
||||
case model.SortByAggregateTotalaccs:
|
||||
return "totalAccs"
|
||||
case model.SortByAggregateTotalacchours:
|
||||
return "totalAccHours"
|
||||
default:
|
||||
return "totalJobs"
|
||||
}
|
||||
}
|
||||
|
||||
// statsFieldGetter returns a function that extracts the sortable int field
|
||||
// from a JobsStatistics struct for the given sort key.
|
||||
func statsFieldGetter(sortBy model.SortByAggregate) func(*model.JobsStatistics) int {
|
||||
|
||||
@@ -55,7 +55,7 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
// resolution = max(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||
jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0, "")
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while loading roofline metrics for job %d", *job.ID)
|
||||
return nil, err
|
||||
|
||||
@@ -62,9 +62,10 @@ func cacheKey(
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
resampleAlgo string,
|
||||
) string {
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
|
||||
*job.ID, job.State, metrics, scopes, resolution)
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]-%d-%s",
|
||||
*job.ID, job.State, metrics, scopes, resolution, resampleAlgo)
|
||||
}
|
||||
|
||||
// LoadData retrieves metric data for a job from the appropriate backend (memory store for running jobs,
|
||||
@@ -87,8 +88,9 @@ func LoadData(job *schema.Job,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
resampleAlgo string,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ any, ttl time.Duration, size int) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution, resampleAlgo), func() (_ any, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
@@ -136,13 +138,17 @@ func LoadData(job *schema.Job,
|
||||
|
||||
jd = deepCopy(jdTemp)
|
||||
|
||||
// Resample archived data using Largest Triangle Three Bucket algorithm to reduce data points
|
||||
// to the requested resolution, improving transfer performance and client-side rendering.
|
||||
// Resample archived data to reduce data points to the requested resolution,
|
||||
// improving transfer performance and client-side rendering.
|
||||
resampleFn, rfErr := resampler.GetResampler(resampleAlgo)
|
||||
if rfErr != nil {
|
||||
return rfErr, 0, 0
|
||||
}
|
||||
for _, v := range jd {
|
||||
for _, v_ := range v {
|
||||
timestep := int64(0)
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
|
||||
v_.Series[i].Data, timestep, err = resampleFn(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
@@ -414,6 +420,7 @@ func LoadNodeListData(
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
resampleAlgo string,
|
||||
) (map[string]schema.JobData, error) {
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
@@ -428,7 +435,7 @@ func LoadNodeListData(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := ms.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx)
|
||||
data, err := ms.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx, resampleAlgo)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s",
|
||||
|
||||
@@ -51,7 +51,8 @@ type MetricDataRepository interface {
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]schema.JobData, error)
|
||||
ctx context.Context,
|
||||
resampleAlgo string) (map[string]schema.JobData, error)
|
||||
|
||||
// HealthCheck evaluates the monitoring state for a set of nodes against expected metrics.
|
||||
HealthCheck(cluster string,
|
||||
|
||||
49
internal/metricdispatch/resamplepolicy.go
Normal file
49
internal/metricdispatch/resamplepolicy.go
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdispatch
|
||||
|
||||
import "math"
|
||||
|
||||
type ResamplePolicy string
|
||||
|
||||
const (
|
||||
ResamplePolicyLow ResamplePolicy = "low"
|
||||
ResamplePolicyMedium ResamplePolicy = "medium"
|
||||
ResamplePolicyHigh ResamplePolicy = "high"
|
||||
)
|
||||
|
||||
// TargetPointsForPolicy returns the target number of data points for a given policy.
|
||||
func TargetPointsForPolicy(policy ResamplePolicy) int {
|
||||
switch policy {
|
||||
case ResamplePolicyLow:
|
||||
return 200
|
||||
case ResamplePolicyMedium:
|
||||
return 500
|
||||
case ResamplePolicyHigh:
|
||||
return 1000
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// ComputeResolution computes the resampling resolution in seconds for a given
|
||||
// job duration, metric frequency, and target point count. Returns 0 if the
|
||||
// total number of data points is already at or below targetPoints (no resampling needed).
|
||||
func ComputeResolution(duration int64, frequency int64, targetPoints int) int {
|
||||
if frequency <= 0 || targetPoints <= 0 || duration <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
totalPoints := duration / frequency
|
||||
if totalPoints <= int64(targetPoints) {
|
||||
return 0
|
||||
}
|
||||
|
||||
targetRes := math.Ceil(float64(duration) / float64(targetPoints))
|
||||
// Round up to nearest multiple of frequency
|
||||
resolution := int(math.Ceil(targetRes/float64(frequency))) * int(frequency)
|
||||
|
||||
return resolution
|
||||
}
|
||||
68
internal/metricdispatch/resamplepolicy_test.go
Normal file
68
internal/metricdispatch/resamplepolicy_test.go
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdispatch
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTargetPointsForPolicy(t *testing.T) {
|
||||
tests := []struct {
|
||||
policy ResamplePolicy
|
||||
want int
|
||||
}{
|
||||
{ResamplePolicyLow, 200},
|
||||
{ResamplePolicyMedium, 500},
|
||||
{ResamplePolicyHigh, 1000},
|
||||
{ResamplePolicy("unknown"), 0},
|
||||
{ResamplePolicy(""), 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := TargetPointsForPolicy(tt.policy); got != tt.want {
|
||||
t.Errorf("TargetPointsForPolicy(%q) = %d, want %d", tt.policy, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeResolution(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
duration int64
|
||||
frequency int64
|
||||
targetPoints int
|
||||
want int
|
||||
}{
|
||||
// 24h job, 60s frequency, 1440 total points
|
||||
{"low_24h_60s", 86400, 60, 200, 480},
|
||||
{"medium_24h_60s", 86400, 60, 500, 180},
|
||||
{"high_24h_60s", 86400, 60, 1000, 120},
|
||||
|
||||
// 2h job, 60s frequency, 120 total points — no resampling needed
|
||||
{"low_2h_60s", 7200, 60, 200, 0},
|
||||
{"medium_2h_60s", 7200, 60, 500, 0},
|
||||
{"high_2h_60s", 7200, 60, 1000, 0},
|
||||
|
||||
// Edge: zero/negative inputs
|
||||
{"zero_duration", 0, 60, 200, 0},
|
||||
{"zero_frequency", 86400, 0, 200, 0},
|
||||
{"zero_target", 86400, 60, 0, 0},
|
||||
{"negative_duration", -100, 60, 200, 0},
|
||||
|
||||
// 12h job, 30s frequency, 1440 total points
|
||||
{"medium_12h_30s", 43200, 30, 500, 90},
|
||||
|
||||
// Exact fit: total points == target points
|
||||
{"exact_fit", 12000, 60, 200, 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ComputeResolution(tt.duration, tt.frequency, tt.targetPoints)
|
||||
if got != tt.want {
|
||||
t.Errorf("ComputeResolution(%d, %d, %d) = %d, want %d",
|
||||
tt.duration, tt.frequency, tt.targetPoints, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -617,6 +617,7 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
resampleAlgo string,
|
||||
) (map[string]schema.JobData, error) {
|
||||
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
||||
if err != nil {
|
||||
|
||||
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
@@ -493,13 +494,15 @@ func SetupRoutes(router chi.Router, buildInfo web.Build) {
|
||||
// Get Roles
|
||||
availableRoles, _ := schema.GetValidRolesMap(user)
|
||||
|
||||
resampling := resamplingForUser(conf)
|
||||
|
||||
page := web.Page{
|
||||
Title: title,
|
||||
User: *user,
|
||||
Roles: availableRoles,
|
||||
Build: buildInfo,
|
||||
Config: conf,
|
||||
Resampling: config.Keys.EnableResampling,
|
||||
Resampling: resampling,
|
||||
Infos: infos,
|
||||
}
|
||||
|
||||
@@ -586,3 +589,36 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
|
||||
web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Warning", MsgType: "alert-warning", Message: "Empty search", User: *user, Roles: availableRoles, Build: buildInfo})
|
||||
}
|
||||
}
|
||||
|
||||
// resamplingForUser returns a ResampleConfig that incorporates the user's
|
||||
// resample policy preference. If the user has a policy set, it creates a
|
||||
// policy-derived config with targetPoints and trigger. Otherwise falls back
|
||||
// to the global config.
|
||||
func resamplingForUser(conf map[string]any) *config.ResampleConfig {
|
||||
globalCfg := config.Keys.EnableResampling
|
||||
|
||||
policyStr := ""
|
||||
if policyVal, ok := conf["plotConfiguration_resamplePolicy"]; ok {
|
||||
if s, ok := policyVal.(string); ok {
|
||||
policyStr = s
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to global default policy, then to "medium"
|
||||
if policyStr == "" && globalCfg != nil {
|
||||
policyStr = globalCfg.DefaultPolicy
|
||||
}
|
||||
if policyStr == "" {
|
||||
policyStr = "medium"
|
||||
}
|
||||
|
||||
policy := metricdispatch.ResamplePolicy(policyStr)
|
||||
targetPoints := metricdispatch.TargetPointsForPolicy(policy)
|
||||
if targetPoints == 0 {
|
||||
return globalCfg
|
||||
}
|
||||
|
||||
return &config.ResampleConfig{
|
||||
TargetPoints: targetPoints,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,19 +198,36 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
||||
func GetMetricConfigSubCluster(cluster, subcluster string) map[string]*schema.Metric {
|
||||
metrics := make(map[string]*schema.Metric)
|
||||
|
||||
sc, err := GetSubCluster(cluster, subcluster)
|
||||
if err != nil {
|
||||
return metrics
|
||||
}
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, m := range c.MetricConfig {
|
||||
for _, s := range m.SubClusters {
|
||||
if s.Name == subcluster {
|
||||
metrics[m.Name] = &schema.Metric{
|
||||
Name: m.Name,
|
||||
Unit: s.Unit,
|
||||
Peak: s.Peak,
|
||||
Normal: s.Normal,
|
||||
Caution: s.Caution,
|
||||
Alert: s.Alert,
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range sc.MetricConfig {
|
||||
metrics[m.Name] = &schema.Metric{
|
||||
Name: m.Name,
|
||||
Unit: m.Unit,
|
||||
Peak: m.Peak,
|
||||
Normal: m.Normal,
|
||||
Caution: m.Caution,
|
||||
Alert: m.Alert,
|
||||
_, ok := metrics[m.Name]
|
||||
if !ok {
|
||||
metrics[m.Name] = &schema.Metric{
|
||||
Name: m.Name,
|
||||
Unit: m.Unit,
|
||||
Peak: m.Peak,
|
||||
Normal: m.Normal,
|
||||
Caution: m.Caution,
|
||||
Alert: m.Alert,
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,27 +37,3 @@ func TestClusterConfig(t *testing.T) {
|
||||
// spew.Dump(archive.GlobalMetricList)
|
||||
// t.Fail()
|
||||
}
|
||||
|
||||
func TestGetMetricConfigSubClusterRespectsRemovedMetrics(t *testing.T) {
|
||||
if err := archive.Init(json.RawMessage(`{"kind": "file","path": "testdata/archive"}`)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
sc, err := archive.GetSubCluster("fritz", "spr2tb")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
metrics := archive.GetMetricConfigSubCluster("fritz", "spr2tb")
|
||||
if len(metrics) != len(sc.MetricConfig) {
|
||||
t.Fatalf("GetMetricConfigSubCluster() returned %d metrics, want %d", len(metrics), len(sc.MetricConfig))
|
||||
}
|
||||
|
||||
if _, ok := metrics["flops_any"]; ok {
|
||||
t.Fatalf("GetMetricConfigSubCluster() returned removed metric flops_any for subcluster spr2tb")
|
||||
}
|
||||
|
||||
if _, ok := metrics["cpu_power"]; !ok {
|
||||
t.Fatalf("GetMetricConfigSubCluster() missing active metric cpu_power for subcluster spr2tb")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,14 +51,15 @@ type APIMetricData struct {
|
||||
//
|
||||
// The request can be customized with flags to include/exclude statistics, raw data, and padding.
|
||||
type APIQueryRequest struct {
|
||||
Cluster string `json:"cluster"`
|
||||
Queries []APIQuery `json:"queries"`
|
||||
ForAllNodes []string `json:"for-all-nodes"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
WithStats bool `json:"with-stats"`
|
||||
WithData bool `json:"with-data"`
|
||||
WithPadding bool `json:"with-padding"`
|
||||
Cluster string `json:"cluster"`
|
||||
Queries []APIQuery `json:"queries"`
|
||||
ForAllNodes []string `json:"for-all-nodes"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
WithStats bool `json:"with-stats"`
|
||||
WithData bool `json:"with-data"`
|
||||
WithPadding bool `json:"with-padding"`
|
||||
ResampleAlgo string `json:"resample-algo,omitempty"`
|
||||
}
|
||||
|
||||
// APIQueryResponse represents the response to an APIQueryRequest.
|
||||
@@ -279,7 +280,7 @@ func FetchData(req APIQueryRequest) (*APIQueryResponse, error) {
|
||||
for _, sel := range sels {
|
||||
data := APIMetricData{}
|
||||
|
||||
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
|
||||
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution, req.ResampleAlgo)
|
||||
if err != nil {
|
||||
// Skip Error If Just Missing Host or Metric, Continue
|
||||
// Empty Return For Metric Handled Gracefully By Frontend
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -23,7 +22,6 @@ import (
|
||||
|
||||
func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
||||
if Keys.Cleanup.Mode == "archive" {
|
||||
cclog.Info("[METRICSTORE]> enable archive cleanup to parquet")
|
||||
// Run as Archiver
|
||||
cleanUpWorker(wg, ctx,
|
||||
Keys.RetentionInMemory,
|
||||
@@ -45,6 +43,7 @@ func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
||||
// cleanUpWorker takes simple values to configure what it does
|
||||
func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {
|
||||
wg.Go(func() {
|
||||
|
||||
d, err := time.ParseDuration(interval)
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> error parsing %s interval duration: %v\n", mode, err)
|
||||
@@ -100,8 +99,8 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
||||
}
|
||||
|
||||
type workItem struct {
|
||||
dir string
|
||||
cluster, host string
|
||||
dir string
|
||||
cluster, host string
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
@@ -182,7 +181,6 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
}
|
||||
|
||||
totalFiles := 0
|
||||
var clusterErrors []string
|
||||
|
||||
for _, clusterEntry := range clusterEntries {
|
||||
if !clusterEntry.IsDir() {
|
||||
@@ -192,9 +190,7 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
cluster := clusterEntry.Name()
|
||||
hostEntries, err := os.ReadDir(filepath.Join(checkpointsDir, cluster))
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> error reading host entries for cluster %s: %s", cluster, err.Error())
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
continue
|
||||
return totalFiles, err
|
||||
}
|
||||
|
||||
// Workers load checkpoint files from disk; main thread writes to parquet.
|
||||
@@ -259,9 +255,7 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
// Drain results channel to unblock workers
|
||||
for range results {
|
||||
}
|
||||
cclog.Errorf("[METRICSTORE]> error creating parquet writer for cluster %s: %s", cluster, err.Error())
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
continue
|
||||
return totalFiles, fmt.Errorf("creating parquet writer for cluster %s: %w", cluster, err)
|
||||
}
|
||||
|
||||
type deleteItem struct {
|
||||
@@ -281,12 +275,6 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
break
|
||||
}
|
||||
}
|
||||
// Flush once per host to keep row group count within parquet limits.
|
||||
if writeErr == nil {
|
||||
if err := writer.FlushRowGroup(); err != nil {
|
||||
writeErr = err
|
||||
}
|
||||
}
|
||||
}
|
||||
// Always track files for deletion (even if write failed, we still drain)
|
||||
toDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})
|
||||
@@ -297,10 +285,7 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
cclog.Errorf("[METRICSTORE]> %d errors reading checkpoints for cluster %s", errs, cluster)
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
os.Remove(parquetFile)
|
||||
continue
|
||||
return totalFiles, fmt.Errorf("%d errors reading checkpoints for cluster %s", errs, cluster)
|
||||
}
|
||||
|
||||
if writer.count == 0 {
|
||||
@@ -311,9 +296,7 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
|
||||
if writeErr != nil {
|
||||
os.Remove(parquetFile)
|
||||
cclog.Errorf("[METRICSTORE]> error writing parquet archive for cluster %s: %s", cluster, writeErr.Error())
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
continue
|
||||
return totalFiles, fmt.Errorf("writing parquet archive for cluster %s: %w", cluster, writeErr)
|
||||
}
|
||||
|
||||
// Delete archived checkpoint files
|
||||
@@ -333,10 +316,5 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
}
|
||||
|
||||
cclog.Infof("[METRICSTORE]> archiving checkpoints completed in %s (%d files)", time.Since(startTime).Round(time.Millisecond), totalFiles)
|
||||
|
||||
if len(clusterErrors) > 0 {
|
||||
return totalFiles, fmt.Errorf("archiving failed for clusters: %s", strings.Join(clusterErrors, ", "))
|
||||
}
|
||||
|
||||
return totalFiles, nil
|
||||
}
|
||||
|
||||
@@ -679,7 +679,7 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric
|
||||
// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
|
||||
// The second and third return value are the actual from/to for the data. Those can be different from
|
||||
// the range asked for if no data was available.
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64, resampleAlgo string) ([]schema.Float, int64, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range")
|
||||
}
|
||||
@@ -737,7 +737,11 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso
|
||||
}
|
||||
}
|
||||
|
||||
data, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)
|
||||
resampleFn, rfErr := resampler.GetResampler(resampleAlgo)
|
||||
if rfErr != nil {
|
||||
return nil, 0, 0, 0, rfErr
|
||||
}
|
||||
data, resolution, err = resampleFn(data, minfo.Frequency, resolution)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, err
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ func newParquetArchiveWriter(filename string) (*parquetArchiveWriter, error) {
|
||||
|
||||
// WriteCheckpointFile streams a CheckpointFile tree directly to Parquet rows,
|
||||
// writing metrics in sorted order without materializing all rows in memory.
|
||||
// Call FlushRowGroup() after writing all checkpoint files for a host.
|
||||
// Produces one row group per call (typically one host's data).
|
||||
func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster, hostname, scope, scopeID string) error {
|
||||
w.writeLevel(cf, cluster, hostname, scope, scopeID)
|
||||
|
||||
@@ -112,15 +112,10 @@ func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster,
|
||||
w.batch = w.batch[:0]
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushRowGroup flushes the current row group to the Parquet file.
|
||||
// Should be called once per host after all checkpoint files for that host are written.
|
||||
func (w *parquetArchiveWriter) FlushRowGroup() error {
|
||||
if err := w.writer.Flush(); err != nil {
|
||||
return fmt.Errorf("flushing parquet row group: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -621,6 +621,7 @@ func (ccms *InternalMetricStore) LoadNodeListData(
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
resampleAlgo string,
|
||||
) (map[string]schema.JobData, error) {
|
||||
// Note: Order of node data is not guaranteed after this point
|
||||
queries, assignedScope, err := buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution))
|
||||
@@ -636,12 +637,13 @@ func (ccms *InternalMetricStore) LoadNodeListData(
|
||||
}
|
||||
|
||||
req := APIQueryRequest{
|
||||
Cluster: cluster,
|
||||
Queries: queries,
|
||||
From: from.Unix(),
|
||||
To: to.Unix(),
|
||||
WithStats: true,
|
||||
WithData: true,
|
||||
Cluster: cluster,
|
||||
Queries: queries,
|
||||
From: from.Unix(),
|
||||
To: to.Unix(),
|
||||
WithStats: true,
|
||||
WithData: true,
|
||||
ResampleAlgo: resampleAlgo,
|
||||
}
|
||||
|
||||
resBody, err := FetchData(req)
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
import Options from "./admin/Options.svelte";
|
||||
import NoticeEdit from "./admin/NoticeEdit.svelte";
|
||||
import RunTaggers from "./admin/RunTaggers.svelte";
|
||||
import PlotRenderOptions from "./user/PlotRenderOptions.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
@@ -29,6 +30,8 @@
|
||||
/* State Init */
|
||||
let users = $state([]);
|
||||
let roles = $state([]);
|
||||
let message = $state({ msg: "", target: "", color: "#d63384" });
|
||||
let displayMessage = $state(false);
|
||||
|
||||
/* Functions */
|
||||
function getUserList() {
|
||||
@@ -52,6 +55,37 @@
|
||||
getValidRoles();
|
||||
}
|
||||
|
||||
async function handleSettingSubmit(event, setting) {
|
||||
event.preventDefault();
|
||||
|
||||
const selector = setting.selector
|
||||
const target = setting.target
|
||||
let form = document.querySelector(selector);
|
||||
let formData = new FormData(form);
|
||||
try {
|
||||
const res = await fetch(form.action, { method: "POST", body: formData });
|
||||
if (res.ok) {
|
||||
let text = await res.text();
|
||||
popMessage(text, target, "#048109");
|
||||
} else {
|
||||
let text = await res.text();
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
popMessage(err, target, "#d63384");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function popMessage(response, restarget, rescolor) {
|
||||
message = { msg: response, target: restarget, color: rescolor };
|
||||
displayMessage = true;
|
||||
setTimeout(function () {
|
||||
displayMessage = false;
|
||||
}, 3500);
|
||||
}
|
||||
|
||||
/* on Mount */
|
||||
onMount(() => initAdmin());
|
||||
</script>
|
||||
@@ -73,3 +107,4 @@
|
||||
<NoticeEdit {ncontent}/>
|
||||
<RunTaggers />
|
||||
</Row>
|
||||
<PlotRenderOptions config={ccconfig} bind:message bind:displayMessage updateSetting={(e, newSetting) => handleSettingSubmit(e, newSetting)}/>
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
Card,
|
||||
CardTitle,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { getContext } from "svelte";
|
||||
import { fade } from "svelte/transition";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
@@ -25,6 +26,8 @@
|
||||
displayMessage = $bindable(),
|
||||
updateSetting
|
||||
} = $props();
|
||||
|
||||
const resampleConfig = getContext("resampling");
|
||||
</script>
|
||||
|
||||
<Row cols={3} class="p-2 g-2">
|
||||
@@ -64,7 +67,7 @@
|
||||
id="lwvalue"
|
||||
name="value"
|
||||
aria-describedby="lineWidthHelp"
|
||||
value={config.plotConfiguration_lineWidth}
|
||||
value={config?.plotConfiguration_lineWidth}
|
||||
min="1"
|
||||
/>
|
||||
<div id="lineWidthHelp" class="form-text">
|
||||
@@ -111,7 +114,7 @@
|
||||
id="pprvalue"
|
||||
name="value"
|
||||
aria-describedby="plotsperrowHelp"
|
||||
value={config.plotConfiguration_plotsPerRow}
|
||||
value={config?.plotConfiguration_plotsPerRow}
|
||||
min="1"
|
||||
/>
|
||||
<div id="plotsperrowHelp" class="form-text">
|
||||
@@ -153,7 +156,7 @@
|
||||
<input type="hidden" name="key" value="plotConfiguration_colorBackground" />
|
||||
<div class="mb-3">
|
||||
<div>
|
||||
{#if config.plotConfiguration_colorBackground}
|
||||
{#if config?.plotConfiguration_colorBackground}
|
||||
<input type="radio" id="colb-true-checked" name="value" value="true" checked />
|
||||
{:else}
|
||||
<input type="radio" id="colb-true" name="value" value="true" />
|
||||
@@ -161,7 +164,7 @@
|
||||
<label for="true">Yes</label>
|
||||
</div>
|
||||
<div>
|
||||
{#if config.plotConfiguration_colorBackground}
|
||||
{#if config?.plotConfiguration_colorBackground}
|
||||
<input type="radio" id="colb-false" name="value" value="false" />
|
||||
{:else}
|
||||
<input type="radio" id="colb-false-checked" name="value" value="false" checked />
|
||||
@@ -219,4 +222,90 @@
|
||||
</form>
|
||||
</Card>
|
||||
</Col>
|
||||
|
||||
<!-- RESAMPLE POLICY -->
|
||||
<Col>
|
||||
<Card class="h-100">
|
||||
<form
|
||||
id="resample-policy-form"
|
||||
method="post"
|
||||
action="/frontend/configuration/"
|
||||
class="card-body"
|
||||
onsubmit={(e) => updateSetting(e, {
|
||||
selector: "#resample-policy-form",
|
||||
target: "rsp",
|
||||
})}
|
||||
>
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Resample Policy</div>
|
||||
{#if displayMessage && message.target == "rsp"}
|
||||
<div style="margin-left: auto; font-size: 0.9em;">
|
||||
<code style="color: {message.color};" out:fade>
|
||||
Update: {message.msg}
|
||||
</code>
|
||||
</div>
|
||||
{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plotConfiguration_resamplePolicy" />
|
||||
<div class="mb-3">
|
||||
{#each [["", "Default"], ["low", "Low"], ["medium", "Medium"], ["high", "High"]] as [val, label]}
|
||||
<div>
|
||||
<input type="radio" id="rsp-{val || 'default'}" name="value" value={JSON.stringify(val)}
|
||||
checked={(!config?.plotConfiguration_resamplePolicy && val === "") || config?.plotConfiguration_resamplePolicy === val} />
|
||||
<label for="rsp-{val || 'default'}">{label}</label>
|
||||
</div>
|
||||
{/each}
|
||||
<div id="resamplePolicyHelp" class="form-text">
|
||||
Controls how many data points are shown in metric plots. Low = fast overview (~200 points), Medium = balanced (~500), High = maximum detail (~1000).
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card>
|
||||
</Col>
|
||||
|
||||
<!-- RESAMPLE ALGORITHM -->
|
||||
<Col>
|
||||
<Card class="h-100">
|
||||
<form
|
||||
id="resample-algo-form"
|
||||
method="post"
|
||||
action="/frontend/configuration/"
|
||||
class="card-body"
|
||||
onsubmit={(e) => updateSetting(e, {
|
||||
selector: "#resample-algo-form",
|
||||
target: "rsa",
|
||||
})}
|
||||
>
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Resample Algorithm</div>
|
||||
{#if displayMessage && message.target == "rsa"}
|
||||
<div style="margin-left: auto; font-size: 0.9em;">
|
||||
<code style="color: {message.color};" out:fade>
|
||||
Update: {message.msg}
|
||||
</code>
|
||||
</div>
|
||||
{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plotConfiguration_resampleAlgo" />
|
||||
<div class="mb-3">
|
||||
{#each [["", "Default"], ["lttb", "LTTB"], ["average", "Average"], ["simple", "Simple"]] as [val, label]}
|
||||
<div>
|
||||
<input type="radio" id="rsa-{val || 'default'}" name="value" value={JSON.stringify(val)}
|
||||
checked={(!config?.plotConfiguration_resampleAlgo && val === "") || config?.plotConfiguration_resampleAlgo === val} />
|
||||
<label for="rsa-{val || 'default'}">{label}</label>
|
||||
</div>
|
||||
{/each}
|
||||
<div id="resampleAlgoHelp" class="form-text">
|
||||
Algorithm used when downsampling time-series data. LTTB preserves visual shape, Average smooths data, Simple picks every Nth point.
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
@@ -73,9 +73,10 @@
|
||||
const subClusterTopology = $derived(getContext("getHardwareTopology")(cluster, subCluster));
|
||||
const metricConfig = $derived(getContext("getMetricConfig")(cluster, subCluster, metric));
|
||||
const usesMeanStatsSeries = $derived((statisticsSeries?.mean && statisticsSeries.mean.length != 0));
|
||||
const resampleTrigger = $derived(resampleConfig?.trigger ? Number(resampleConfig.trigger) : null);
|
||||
const resampleTrigger = $derived(resampleConfig?.trigger ? Number(resampleConfig.trigger) : (resampleConfig?.targetPoints ? Math.floor(resampleConfig.targetPoints / 4) : null));
|
||||
const resampleResolutions = $derived(resampleConfig?.resolutions ? [...resampleConfig.resolutions] : null);
|
||||
const resampleMinimum = $derived(resampleConfig?.resolutions ? Math.min(...resampleConfig.resolutions) : null);
|
||||
const resampleTargetPoints = $derived(resampleConfig?.targetPoints ? Number(resampleConfig.targetPoints) : null);
|
||||
const useStatsSeries = $derived(!!statisticsSeries); // Display Stats Series By Default if Exists
|
||||
const thresholds = $derived(findJobAggregationThresholds(
|
||||
subClusterTopology,
|
||||
@@ -515,24 +516,29 @@
|
||||
if (resampleConfig && !forNode && key === 'x') {
|
||||
const numX = (u.series[0].idxs[1] - u.series[0].idxs[0])
|
||||
if (numX <= resampleTrigger && timestep !== resampleMinimum) {
|
||||
/* Get closest zoom level; prevents multiple iterative zoom requests for big zoom-steps (e.g. 600 -> 300 -> 120 -> 60) */
|
||||
// Which resolution to theoretically request to achieve 30 or more visible data points:
|
||||
const target = (numX * timestep) / resampleTrigger
|
||||
// Which configured resolution actually matches the closest to theoretical target:
|
||||
const closest = resampleResolutions.reduce(function(prev, curr) {
|
||||
return (Math.abs(curr - target) < Math.abs(prev - target) ? curr : prev);
|
||||
});
|
||||
let newRes;
|
||||
if (resampleTargetPoints && !resampleResolutions) {
|
||||
// Policy-based: compute resolution dynamically from visible window
|
||||
const visibleDuration = (u.scales.x.max - u.scales.x.min);
|
||||
const nativeTimestep = metricConfig?.timestep || timestep;
|
||||
newRes = Math.ceil(visibleDuration / resampleTargetPoints / nativeTimestep) * nativeTimestep;
|
||||
if (newRes < nativeTimestep) newRes = nativeTimestep;
|
||||
} else if (resampleResolutions) {
|
||||
// Array-based: find closest configured resolution
|
||||
const target = (numX * timestep) / resampleTrigger;
|
||||
newRes = resampleResolutions.reduce(function(prev, curr) {
|
||||
return (Math.abs(curr - target) < Math.abs(prev - target) ? curr : prev);
|
||||
});
|
||||
}
|
||||
// Prevents non-required dispatches
|
||||
if (timestep !== closest) {
|
||||
// console.log('Dispatch: Zoom with Res from / to', timestep, closest)
|
||||
if (newRes && timestep !== newRes) {
|
||||
onZoom({
|
||||
newRes: closest,
|
||||
newRes: newRes,
|
||||
lastZoomState: u?.scales,
|
||||
lastThreshold: thresholds?.normal
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// console.log('Dispatch: Zoom Update States')
|
||||
onZoom({
|
||||
lastZoomState: u?.scales,
|
||||
lastThreshold: thresholds?.normal
|
||||
|
||||
@@ -72,6 +72,8 @@ type PlotConfiguration struct {
|
||||
PlotsPerRow int `json:"plots-per-row"`
|
||||
LineWidth int `json:"line-width"`
|
||||
ColorScheme []string `json:"color-scheme"`
|
||||
ResampleAlgo string `json:"resample-algo"`
|
||||
ResamplePolicy string `json:"resample-policy"`
|
||||
}
|
||||
|
||||
var UIDefaults = WebConfig{
|
||||
@@ -144,6 +146,8 @@ func Init(rawConfig json.RawMessage) error {
|
||||
UIDefaultsMap["plotConfiguration_plotsPerRow"] = UIDefaults.PlotConfiguration.PlotsPerRow
|
||||
UIDefaultsMap["plotConfiguration_lineWidth"] = UIDefaults.PlotConfiguration.LineWidth
|
||||
UIDefaultsMap["plotConfiguration_colorScheme"] = UIDefaults.PlotConfiguration.ColorScheme
|
||||
UIDefaultsMap["plotConfiguration_resampleAlgo"] = UIDefaults.PlotConfiguration.ResampleAlgo
|
||||
UIDefaultsMap["plotConfiguration_resamplePolicy"] = UIDefaults.PlotConfiguration.ResamplePolicy
|
||||
|
||||
for _, c := range UIDefaults.MetricConfig.Clusters {
|
||||
if c.JobListMetrics != nil {
|
||||
|
||||
Reference in New Issue
Block a user