diff --git a/api/schema.graphqls b/api/schema.graphqls index 24071752..e4e2b8ed 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -19,6 +19,7 @@ type Node { schedulerState: SchedulerState! healthState: MonitoringState! metaData: Any + healthData: Any } type NodeStates { @@ -328,6 +329,7 @@ type Query { ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! + nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStates(filter: [NodeFilter!]): [NodeStates!]! nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]! diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 3ee05383..fde95fd3 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -279,8 +279,6 @@ func initSubsystems() error { return fmt.Errorf("initializing archive: %w", err) } - // Note: metricstore.Init() is called later in runServer() with proper configuration - // Handle database re-initialization if flagReinitDB { if err := importer.InitDB(); err != nil { diff --git a/configs/config-demo.json b/configs/config-demo.json index c3042993..509c8f18 100644 --- a/configs/config-demo.json +++ b/configs/config-demo.json @@ -12,6 +12,13 @@ "max-age": "2000h" } }, + "metric-store-external": [ + { + "scope": "fritz", + "url": "http://0.0.0.0:8082", + "token": "eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzU3Nzg4NDQsImlhdCI6MTc2ODU3ODg0NCwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9._SDEW9WaUVXSBFmWqGhyIZXLoqoDU8F1hkfh4cXKIqF4yw7w50IUpfUBtwUFUOnoviFKoi563f6RAMC7XxeLDA" + } + ], "metric-store": { "checkpoints": { "interval": "12h" @@ -19,4 +26,4 @@ "retention-in-memory": "48h", "memory-cap": 100 } -} \ No newline at end of file +} diff --git a/configs/config.json b/configs/config.json index 584baed3..c2361a1c 100644 --- a/configs/config.json +++ b/configs/config.json @@ -8,6 +8,11 @@ "api-allowed-ips": ["*"], "short-running-jobs-duration": 300, "enable-job-taggers": true, + "nodestate-retention": { + "policy": "move", + "target-kind": "file", + "target-path": "./var/nodestate-archive" + }, "resampling": { "minimum-points": 600, "trigger": 180, @@ -92,4 +97,3 @@ }, "ui-file": "ui-config.json" } - diff --git a/go.mod b/go.mod index f9bf7e42..b790c0c8 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/ClusterCockpit/cc-backend -go 1.24.9 +go 1.25.0 tool ( github.com/99designs/gqlgen @@ -8,28 +8,28 @@ tool ( ) require ( - github.com/99designs/gqlgen v0.17.85 - github.com/ClusterCockpit/cc-lib/v2 v2.4.0 + github.com/99designs/gqlgen v0.17.86 + github.com/ClusterCockpit/cc-lib/v2 v2.5.1 github.com/Masterminds/squirrel v1.5.4 github.com/aws/aws-sdk-go-v2 v1.41.1 - github.com/aws/aws-sdk-go-v2/config v1.32.6 - github.com/aws/aws-sdk-go-v2/credentials v1.19.7 - github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0 + github.com/aws/aws-sdk-go-v2/config v1.32.8 + github.com/aws/aws-sdk-go-v2/credentials v1.19.8 + github.com/aws/aws-sdk-go-v2/service/s3 v1.96.0 github.com/coreos/go-oidc/v3 v3.17.0 - github.com/expr-lang/expr v1.17.7 + github.com/expr-lang/expr v1.17.8 github.com/go-chi/chi/v5 v5.2.5 github.com/go-chi/cors v1.2.2 - github.com/go-co-op/gocron/v2 v2.19.0 + github.com/go-co-op/gocron/v2 v2.19.1 github.com/go-ldap/ldap/v3 v3.4.12 - github.com/golang-jwt/jwt/v5 v5.3.0 + github.com/golang-jwt/jwt/v5 v5.3.1 github.com/golang-migrate/migrate/v4 v4.19.1 - github.com/google/gops v0.3.28 + github.com/google/gops v0.3.29 github.com/gorilla/sessions v1.4.0 github.com/influxdata/line-protocol/v2 v2.2.1 github.com/jmoiron/sqlx v1.4.0 github.com/joho/godotenv v1.5.1 - github.com/linkedin/goavro/v2 v2.14.1 - github.com/mattn/go-sqlite3 v1.14.33 + github.com/linkedin/goavro/v2 v2.15.0 + github.com/mattn/go-sqlite3 v1.14.34 github.com/parquet-go/parquet-go v0.27.0 github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 @@ -37,30 +37,30 @@ require ( github.com/swaggo/http-swagger v1.3.4 github.com/swaggo/swag v1.16.6 github.com/vektah/gqlparser/v2 v2.5.31 - golang.org/x/crypto v0.46.0 - golang.org/x/oauth2 v0.34.0 + golang.org/x/crypto v0.48.0 + golang.org/x/oauth2 v0.35.0 golang.org/x/time v0.14.0 ) require ( - github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect + github.com/Azure/go-ntlmssp v0.1.0 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/agnivade/levenshtein v1.2.1 // indirect - github.com/andybalholm/brotli v1.1.1 // indirect + github.com/andybalholm/brotli v1.2.0 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17 // indirect github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.14 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect github.com/aws/smithy-go v1.24.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect @@ -68,9 +68,9 @@ require ( github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect github.com/go-jose/go-jose/v4 v4.1.3 // indirect - github.com/go-openapi/jsonpointer v0.22.3 // indirect - github.com/go-openapi/jsonreference v0.21.3 // indirect - github.com/go-openapi/spec v0.22.1 // indirect + github.com/go-openapi/jsonpointer v0.22.4 // indirect + github.com/go-openapi/jsonreference v0.21.4 // indirect + github.com/go-openapi/spec v0.22.3 // indirect github.com/go-openapi/swag/conv v0.25.4 // indirect github.com/go-openapi/swag/jsonname v0.25.4 // indirect github.com/go-openapi/swag/jsonutils v0.25.4 // indirect @@ -78,9 +78,9 @@ require ( github.com/go-openapi/swag/stringutils v0.25.4 // indirect github.com/go-openapi/swag/typeutils v0.25.4 // indirect github.com/go-openapi/swag/yamlutils v0.25.4 // indirect - github.com/go-viper/mapstructure/v2 v2.4.0 // indirect - github.com/goccy/go-yaml v1.19.0 // indirect - github.com/golang/snappy v0.0.4 // indirect + github.com/go-viper/mapstructure/v2 v2.5.0 // indirect + github.com/goccy/go-yaml v1.19.2 // indirect + github.com/golang/snappy v1.0.0 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect @@ -88,23 +88,23 @@ require ( github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect github.com/jonboulle/clockwork v0.5.0 // indirect - github.com/klauspost/compress v1.18.2 // indirect + github.com/klauspost/compress v1.18.4 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect - github.com/nats-io/nats.go v1.47.0 // indirect - github.com/nats-io/nkeys v0.4.12 // indirect + github.com/nats-io/nats.go v1.48.0 // indirect + github.com/nats-io/nkeys v0.4.15 // indirect github.com/nats-io/nuid v1.0.1 // indirect - github.com/oapi-codegen/runtime v1.1.1 // indirect + github.com/oapi-codegen/runtime v1.1.2 // indirect github.com/parquet-go/bitpack v1.0.0 // indirect - github.com/parquet-go/jsonlite v1.0.0 // indirect - github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/parquet-go/jsonlite v1.4.0 // indirect + github.com/pierrec/lz4/v4 v4.1.25 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/common v0.67.4 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect + github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sosodev/duration v1.3.1 // indirect - github.com/stmcginnis/gofish v0.20.0 // indirect + github.com/stmcginnis/gofish v0.21.1 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/swaggo/files v1.0.1 // indirect github.com/twpayne/go-geom v1.6.1 // indirect @@ -113,13 +113,13 @@ require ( github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect - golang.org/x/mod v0.31.0 // indirect - golang.org/x/net v0.48.0 // indirect + golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a // indirect + golang.org/x/mod v0.33.0 // indirect + golang.org/x/net v0.50.0 // indirect golang.org/x/sync v0.19.0 // indirect - golang.org/x/sys v0.39.0 // indirect - golang.org/x/text v0.32.0 // indirect - golang.org/x/tools v0.40.0 // indirect + golang.org/x/sys v0.41.0 // indirect + golang.org/x/text v0.34.0 // indirect + golang.org/x/tools v0.42.0 // indirect google.golang.org/protobuf v1.36.11 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect sigs.k8s.io/yaml v1.6.0 // indirect diff --git a/go.sum b/go.sum index 509c659c..c319c6ba 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,11 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= -github.com/99designs/gqlgen v0.17.85 h1:EkGx3U2FDcxQm8YDLQSpXIAVmpDyZ3IcBMOJi2nH1S0= -github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU= -github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= -github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib/v2 v2.4.0 h1:OnZlvqSatg7yCQ2NtSR7AddpUVSiuSMZ8scF1a7nfOk= -github.com/ClusterCockpit/cc-lib/v2 v2.4.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw= +github.com/99designs/gqlgen v0.17.86 h1:C8N3UTa5heXX6twl+b0AJyGkTwYL6dNmFrgZNLRcU6w= +github.com/99designs/gqlgen v0.17.86/go.mod h1:KTrPl+vHA1IUzNlh4EYkl7+tcErL3MgKnhHrBcV74Fw= +github.com/Azure/go-ntlmssp v0.1.0 h1:DjFo6YtWzNqNvQdrwEyr/e4nhU3vRiwenz5QX7sFz+A= +github.com/Azure/go-ntlmssp v0.1.0/go.mod h1:NYqdhxd/8aAct/s4qSYZEerdPuH1liG2/X9DiVTbhpk= +github.com/ClusterCockpit/cc-lib/v2 v2.5.1 h1:s6M9tyPDty+4zTdQGJYKpGJM9Nz7N6ITMdjPvNSLX5g= +github.com/ClusterCockpit/cc-lib/v2 v2.5.1/go.mod h1:DZ8OIHPUZJpWqErLITt0B8P6/Q7CBW2IQSQ5YiFFaG0= github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= @@ -27,8 +27,8 @@ github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktp github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= -github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= -github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ= +github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op h1:Ucf+QxEKMbPogRO5guBNe5cgd9uZgfoJLOYs8WWhtjM= @@ -41,10 +41,10 @@ github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6ce github.com/aws/aws-sdk-go-v2 v1.41.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4= -github.com/aws/aws-sdk-go-v2/config v1.32.6 h1:hFLBGUKjmLAekvi1evLi5hVvFQtSo3GYwi+Bx4lpJf8= -github.com/aws/aws-sdk-go-v2/config v1.32.6/go.mod h1:lcUL/gcd8WyjCrMnxez5OXkO3/rwcNmvfno62tnXNcI= -github.com/aws/aws-sdk-go-v2/credentials v1.19.7 h1:tHK47VqqtJxOymRrNtUXN5SP/zUTvZKeLx4tH6PGQc8= -github.com/aws/aws-sdk-go-v2/credentials v1.19.7/go.mod h1:qOZk8sPDrxhf+4Wf4oT2urYJrYt3RejHSzgAquYeppw= +github.com/aws/aws-sdk-go-v2/config v1.32.8 h1:iu+64gwDKEoKnyTQskSku72dAwggKI5sV6rNvgSMpMs= +github.com/aws/aws-sdk-go-v2/config v1.32.8/go.mod h1:MI2XvA+qDi3i9AJxX1E2fu730syEBzp/jnXrjxuHwgI= +github.com/aws/aws-sdk-go-v2/credentials v1.19.8 h1:Jp2JYH1lRT3KhX4mshHPvVYsR5qqRec3hGvEarNYoR0= +github.com/aws/aws-sdk-go-v2/credentials v1.19.8/go.mod h1:fZG9tuvyVfxknv1rKibIz3DobRaFw1Poe8IKtXB3XYY= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 h1:I0GyV8wiYrP8XpA70g1HBcQO1JlQxCMTW9npl5UbDHY= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17/go.mod h1:tyw7BOl5bBe/oqvoIeECFJjMdzXoa/dfVz3QQ5lgHGA= github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 h1:xOLELNKGp2vsiteLsvLPwxC+mYmO6OZ8PYgiuPJzF8U= @@ -53,24 +53,24 @@ github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 h1:WWLqlh79iO48yLkj1v github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17/go.mod h1:EhG22vHRrvF8oXSTYStZhJc1aUgKtnJe+aOiFEV90cM= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 h1:CjMzUs78RDDv4ROu3JnJn/Ig1r6ZD7/T2DXLLRpejic= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16/go.mod h1:uVW4OLBqbJXSHJYA9svT9BluSvvwbzLQ2Crf6UPzR3c= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17 h1:JqcdRG//czea7Ppjb+g/n4o8i/R50aTBHkA7vu0lK+k= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17/go.mod h1:CO+WeGmIdj/MlPel2KwID9Gt7CNq4M65HUfBW97liM0= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 h1:DIBqIrJ7hv+e4CmIk2z3pyKT+3B6qVMgRsawHiR3qso= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7/go.mod h1:vLm00xmBke75UmpNvOcZQ/Q30ZFjbczeLFqGx5urmGo= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8 h1:Z5EiPIzXKewUQK0QTMkutjiaPVeVYXX7KIqhXu/0fXs= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8/go.mod h1:FsTpJtvC4U1fyDXk7c71XoDv3HlRm8V3NiYLeYLh5YE= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 h1:RuNSMoozM8oXlgLG/n6WLaFGoea7/CddrCfIiSA+xdY= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17/go.mod h1:F2xxQ9TZz5gDWsclCtPQscGpP0VUOc8RqgFM3vDENmU= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 h1:NSbvS17MlI2lurYgXnCOLvCFX38sBW4eiVER7+kkgsU= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16/go.mod h1:SwT8Tmqd4sA6G1qaGdzWCJN99bUmPGHfRwwq3G5Qb+A= -github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0 h1:MIWra+MSq53CFaXXAywB2qg9YvVZifkk6vEGl/1Qor0= -github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0/go.mod h1:79S2BdqCJpScXZA2y+cpZuocWsjGjJINyXnOsf5DTz8= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17 h1:bGeHBsGZx0Dvu/eJC0Lh9adJa3M1xREcndxLNZlve2U= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17/go.mod h1:dcW24lbU0CzHusTE8LLHhRLI42ejmINN8Lcr22bwh/g= +github.com/aws/aws-sdk-go-v2/service/s3 v1.96.0 h1:oeu8VPlOre74lBA/PMhxa5vewaMIMmILM+RraSyB8KA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.96.0/go.mod h1:5jggDlZ2CLQhwJBiZJb4vfk4f0GxWdEDruWKEJ1xOdo= github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 h1:VrhDvQib/i0lxvr3zqlUwLwJP4fpmpyD9wYG1vfSu+Y= github.com/aws/aws-sdk-go-v2/service/signin v1.0.5/go.mod h1:k029+U8SY30/3/ras4G/Fnv/b88N4mAfliNn08Dem4M= github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 h1:v6EiMvhEYBoHABfbGB4alOYmCIrcgyPPiBE1wZAEbqk= github.com/aws/aws-sdk-go-v2/service/sso v1.30.9/go.mod h1:yifAsgBxgJWn3ggx70A3urX2AN49Y5sJTD1UQFlfqBw= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 h1:gd84Omyu9JLriJVCbGApcLzVR3XtmC4ZDPcAI6Ftvds= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.14 h1:0jbJeuEHlwKJ9PfXtpSFc4MF+WIWORdhN1n30ITZGFM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.14/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo= github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/pJ1jOWYlFDJTjRQ= github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ= github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk= @@ -91,8 +91,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= -github.com/expr-lang/expr v1.17.7 h1:Q0xY/e/2aCIp8g9s/LGvMDCC5PxYlvHgDZRQ4y16JX8= -github.com/expr-lang/expr v1.17.7/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= +github.com/expr-lang/expr v1.17.8 h1:W1loDTT+0PQf5YteHSTpju2qfUfNoBt4yw9+wOEU9VM= +github.com/expr-lang/expr v1.17.8/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk= @@ -105,18 +105,18 @@ github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug= github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0= github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE= github.com/go-chi/cors v1.2.2/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58= -github.com/go-co-op/gocron/v2 v2.19.0 h1:OKf2y6LXPs/BgBI2fl8PxUpNAI1DA9Mg+hSeGOS38OU= -github.com/go-co-op/gocron/v2 v2.19.0/go.mod h1:5lEiCKk1oVJV39Zg7/YG10OnaVrDAV5GGR6O0663k6U= +github.com/go-co-op/gocron/v2 v2.19.1 h1:B4iLeA0NB/2iO3EKQ7NfKn5KsQgZfjb2fkvoZJU3yBI= +github.com/go-co-op/gocron/v2 v2.19.1/go.mod h1:5lEiCKk1oVJV39Zg7/YG10OnaVrDAV5GGR6O0663k6U= github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4= github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo= -github.com/go-openapi/jsonpointer v0.22.3 h1:dKMwfV4fmt6Ah90zloTbUKWMD+0he+12XYAsPotrkn8= -github.com/go-openapi/jsonpointer v0.22.3/go.mod h1:0lBbqeRsQ5lIanv3LHZBrmRGHLHcQoOXQnf88fHlGWo= -github.com/go-openapi/jsonreference v0.21.3 h1:96Dn+MRPa0nYAR8DR1E03SblB5FJvh7W6krPI0Z7qMc= -github.com/go-openapi/jsonreference v0.21.3/go.mod h1:RqkUP0MrLf37HqxZxrIAtTWW4ZJIK1VzduhXYBEeGc4= -github.com/go-openapi/spec v0.22.1 h1:beZMa5AVQzRspNjvhe5aG1/XyBSMeX1eEOs7dMoXh/k= -github.com/go-openapi/spec v0.22.1/go.mod h1:c7aeIQT175dVowfp7FeCvXXnjN/MrpaONStibD2WtDA= +github.com/go-openapi/jsonpointer v0.22.4 h1:dZtK82WlNpVLDW2jlA1YCiVJFVqkED1MegOUy9kR5T4= +github.com/go-openapi/jsonpointer v0.22.4/go.mod h1:elX9+UgznpFhgBuaMQ7iu4lvvX1nvNsesQ3oxmYTw80= +github.com/go-openapi/jsonreference v0.21.4 h1:24qaE2y9bx/q3uRK/qN+TDwbok1NhbSmGjjySRCHtC8= +github.com/go-openapi/jsonreference v0.21.4/go.mod h1:rIENPTjDbLpzQmQWCj5kKj3ZlmEh+EFVbz3RTUh30/4= +github.com/go-openapi/spec v0.22.3 h1:qRSmj6Smz2rEBxMnLRBMeBWxbbOvuOoElvSvObIgwQc= +github.com/go-openapi/spec v0.22.3/go.mod h1:iIImLODL2loCh3Vnox8TY2YWYJZjMAKYyLH2Mu8lOZs= github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= github.com/go-openapi/swag/conv v0.25.4 h1:/Dd7p0LZXczgUcC/Ikm1+YqVzkEeCc9LnOWjfkpkfe4= github.com/go-openapi/swag/conv v0.25.4/go.mod h1:3LXfie/lwoAv0NHoEuY1hjoFAYkvlqI/Bn5EQDD3PPU= @@ -141,17 +141,17 @@ github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16p github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= -github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= -github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= -github.com/goccy/go-yaml v1.19.0 h1:EmkZ9RIsX+Uq4DYFowegAuJo8+xdX3T/2dwNPXbxEYE= -github.com/goccy/go-yaml v1.19.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= -github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= -github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/go-viper/mapstructure/v2 v2.5.0 h1:vM5IJoUAy3d7zRSVtIwQgBj7BiWtMPfmPEgAXnvj1Ro= +github.com/go-viper/mapstructure/v2 v2.5.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= +github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA= github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -160,8 +160,8 @@ github.com/google/go-tpm v0.9.7 h1:u89J4tUUeDTlH8xxC3CTW7OHZjbjKoHdQ9W7gCUhtxA= github.com/google/go-tpm v0.9.7/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gops v0.3.28 h1:2Xr57tqKAmQYRAfG12E+yLcoa2Y42UJo2lOrUFL9ark= -github.com/google/gops v0.3.28/go.mod h1:6f6+Nl8LcHrzJwi8+p0ii+vmBFSlB4f8cOOkTJ7sk4c= +github.com/google/gops v0.3.29 h1:n98J2qSOK1NJvRjdLDcjgDryjpIBGhbaqph1mXKL0rY= +github.com/google/gops v0.3.29/go.mod h1:8N3jZftuPazvUwtYY/ncG4iPrjp15ysNKLfq+QQPiwc= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= @@ -208,8 +208,8 @@ github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwA github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I= github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= -github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= -github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= +github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -224,12 +224,12 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/linkedin/goavro/v2 v2.14.1 h1:/8VjDpd38PRsy02JS0jflAu7JZPfJcGTwqWgMkFS2iI= -github.com/linkedin/goavro/v2 v2.14.1/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk= +github.com/linkedin/goavro/v2 v2.15.0 h1:pDj1UrjUOO62iXhgBiE7jQkpNIc5/tA5eZsgolMjgVI= +github.com/linkedin/goavro/v2 v2.15.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk= github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0= -github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk= +github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk= github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= @@ -238,24 +238,24 @@ github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g= github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA= github.com/nats-io/nats-server/v2 v2.12.3 h1:KRv+1n7lddMVgkJPQer+pt36TcO0ENxjilBmeWdjcHs= github.com/nats-io/nats-server/v2 v2.12.3/go.mod h1:MQXjG9WjyXKz9koWzUc3jYUMKD8x3CLmTNy91IQQz3Y= -github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM= -github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= -github.com/nats-io/nkeys v0.4.12 h1:nssm7JKOG9/x4J8II47VWCL1Ds29avyiQDRn0ckMvDc= -github.com/nats-io/nkeys v0.4.12/go.mod h1:MT59A1HYcjIcyQDJStTfaOY6vhy9XTUjOFo+SVsvpBg= +github.com/nats-io/nats.go v1.48.0 h1:pSFyXApG+yWU/TgbKCjmm5K4wrHu86231/w84qRVR+U= +github.com/nats-io/nats.go v1.48.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= +github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4= +github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= -github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= +github.com/oapi-codegen/runtime v1.1.2 h1:P2+CubHq8fO4Q6fV1tqDBZHCwpVpvPg7oKiYzQgXIyI= +github.com/oapi-codegen/runtime v1.1.2/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA= github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs= -github.com/parquet-go/jsonlite v1.0.0 h1:87QNdi56wOfsE5bdgas0vRzHPxfJgzrXGml1zZdd7VU= -github.com/parquet-go/jsonlite v1.0.0/go.mod h1:nDjpkpL4EOtqs6NQugUsi0Rleq9sW/OtC1NnZEnxzF0= +github.com/parquet-go/jsonlite v1.4.0 h1:RTG7prqfO0HD5egejU8MUDBN8oToMj55cgSV1I0zNW4= +github.com/parquet-go/jsonlite v1.4.0/go.mod h1:nDjpkpL4EOtqs6NQugUsi0Rleq9sW/OtC1NnZEnxzF0= github.com/parquet-go/parquet-go v0.27.0 h1:vHWK2xaHbj+v1DYps03yDRpEsdtOeKbhiXUaixoPb3g= github.com/parquet-go/parquet-go v0.27.0/go.mod h1:navtkAYr2LGoJVp141oXPlO/sxLvaOe3la2JEoD8+rg= -github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= -github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0= +github.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= @@ -264,17 +264,17 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc= -github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI= -github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= -github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws= +github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0= github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= -github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= -github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4= @@ -284,8 +284,8 @@ github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NF github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4= github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg= github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= -github.com/stmcginnis/gofish v0.20.0 h1:hH2V2Qe898F2wWT1loApnkDUrXXiLKqbSlMaH3Y1n08= -github.com/stmcginnis/gofish v0.20.0/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU= +github.com/stmcginnis/gofish v0.21.1 h1:sutDvBhmLh4RDOZ1DN8GUyYRu7f1ggvKMMnSaiqhwn4= +github.com/stmcginnis/gofish v0.21.1/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= @@ -324,21 +324,21 @@ go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= -golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= -golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= -golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= +golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= +golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= +golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a h1:ovFr6Z0MNmU7nH8VaX5xqw+05ST2uO1exVfZPVqRC5o= +golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= -golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= +golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= -golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= -golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= -golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60= +golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= @@ -349,8 +349,8 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -358,15 +358,15 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= -golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= +golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= -golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= diff --git a/init/clustercockpit.service b/init/clustercockpit.service index b4ed8bfa..3c977e34 100644 --- a/init/clustercockpit.service +++ b/init/clustercockpit.service @@ -12,7 +12,7 @@ NotifyAccess=all Restart=on-failure RestartSec=30 TimeoutStopSec=100 -ExecStart=/opt/monitoring/cc-backend/cc-backend --config ./config.json +ExecStart=/opt/monitoring/cc-backend/cc-backend --config ./config.json --server [Install] WantedBy=multi-user.target diff --git a/internal/api/job.go b/internal/api/job.go index c4a81cf2..1322225b 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -697,7 +697,15 @@ func (api *RestAPI) startJob(rw http.ResponseWriter, r *http.Request) { } } - id, err := api.JobRepository.Start(&req) + // When tags are present, insert directly into the job table so that the + // returned ID can be used with AddTagOrCreate (which queries the job table). + // Jobs without tags use the cache path as before. + var id int64 + if len(req.Tags) > 0 { + id, err = api.JobRepository.StartDirect(&req) + } else { + id, err = api.JobRepository.Start(&req) + } if err != nil { handleError(fmt.Errorf("insert into database failed: %w", err), http.StatusInternalServerError, rw) return diff --git a/internal/api/log.go b/internal/api/log.go new file mode 100644 index 00000000..90add9bb --- /dev/null +++ b/internal/api/log.go @@ -0,0 +1,165 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package api + +import ( + "bufio" + "encoding/json" + "fmt" + "net/http" + "os/exec" + "regexp" + "strconv" + "strings" + + "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-backend/internal/repository" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" +) + +type LogEntry struct { + Timestamp string `json:"timestamp"` + Priority int `json:"priority"` + Message string `json:"message"` + Unit string `json:"unit"` +} + +var safePattern = regexp.MustCompile(`^[a-zA-Z0-9 :\-\.]+$`) + +func (api *RestAPI) getJournalLog(rw http.ResponseWriter, r *http.Request) { + user := repository.GetUserFromContext(r.Context()) + if !user.HasRole(schema.RoleAdmin) { + handleError(fmt.Errorf("only admins are allowed to view logs"), http.StatusForbidden, rw) + return + } + + since := r.URL.Query().Get("since") + if since == "" { + since = "1 hour ago" + } + if !safePattern.MatchString(since) { + handleError(fmt.Errorf("invalid 'since' parameter"), http.StatusBadRequest, rw) + return + } + + lines := 200 + if l := r.URL.Query().Get("lines"); l != "" { + n, err := strconv.Atoi(l) + if err != nil || n < 1 { + handleError(fmt.Errorf("invalid 'lines' parameter"), http.StatusBadRequest, rw) + return + } + if n > 1000 { + n = 1000 + } + lines = n + } + + unit := config.Keys.SystemdUnit + if unit == "" { + unit = "clustercockpit.service" + } + + args := []string{ + "--output=json", + "--no-pager", + "-n", fmt.Sprintf("%d", lines), + "--since", since, + "-u", unit, + } + + if level := r.URL.Query().Get("level"); level != "" { + n, err := strconv.Atoi(level) + if err != nil || n < 0 || n > 7 { + handleError(fmt.Errorf("invalid 'level' parameter (must be 0-7)"), http.StatusBadRequest, rw) + return + } + args = append(args, "--priority", fmt.Sprintf("%d", n)) + } + + if search := r.URL.Query().Get("search"); search != "" { + if !safePattern.MatchString(search) { + handleError(fmt.Errorf("invalid 'search' parameter"), http.StatusBadRequest, rw) + return + } + args = append(args, "--grep", search) + } + + cclog.Debugf("calling journalctl with %s", strings.Join(args, " ")) + cmd := exec.CommandContext(r.Context(), "journalctl", args...) + stdout, err := cmd.StdoutPipe() + if err != nil { + handleError(fmt.Errorf("failed to create pipe: %w", err), http.StatusInternalServerError, rw) + return + } + + if err := cmd.Start(); err != nil { + handleError(fmt.Errorf("failed to start journalctl: %w", err), http.StatusInternalServerError, rw) + return + } + + entries := make([]LogEntry, 0, lines) + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + var raw map[string]any + if err := json.Unmarshal(scanner.Bytes(), &raw); err != nil { + cclog.Debugf("error unmarshal log output: %v", err) + continue + } + + priority := 6 // default info + if p, ok := raw["PRIORITY"]; ok { + switch v := p.(type) { + case string: + if n, err := strconv.Atoi(v); err == nil { + priority = n + } + case float64: + priority = int(v) + } + } + + msg := "" + if m, ok := raw["MESSAGE"]; ok { + if s, ok := m.(string); ok { + msg = s + } + } + + ts := "" + if t, ok := raw["__REALTIME_TIMESTAMP"]; ok { + if s, ok := t.(string); ok { + ts = s + } + } + + unitName := "" + if u, ok := raw["_SYSTEMD_UNIT"]; ok { + if s, ok := u.(string); ok { + unitName = s + } + } + + entries = append(entries, LogEntry{ + Timestamp: ts, + Priority: priority, + Message: msg, + Unit: unitName, + }) + } + + if err := cmd.Wait(); err != nil { + // journalctl returns exit code 1 when --grep matches nothing + if len(entries) == 0 { + cclog.Debugf("journalctl exited with: %v", err) + } + } + + rw.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(rw).Encode(entries); err != nil { + cclog.Errorf("Failed to encode log entries: %v", err) + } +} diff --git a/internal/api/nats.go b/internal/api/nats.go index f1684f20..02a03fae 100644 --- a/internal/api/nats.go +++ b/internal/api/nats.go @@ -211,7 +211,14 @@ func (api *NatsAPI) handleStartJob(payload string) { } } - id, err := api.JobRepository.Start(&req) + // When tags are present, insert directly into the job table so that the + // returned ID can be used with AddTagOrCreate (which queries the job table). + var id int64 + if len(req.Tags) > 0 { + id, err = api.JobRepository.StartDirect(&req) + } else { + id, err = api.JobRepository.Start(&req) + } if err != nil { cclog.Errorf("NATS start job: insert into database failed: %v", err) return diff --git a/internal/api/node.go b/internal/api/node.go index e6b19479..5032ed7b 100644 --- a/internal/api/node.go +++ b/internal/api/node.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/ClusterCockpit/cc-backend/internal/metricdispatch" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/metricstore" @@ -77,25 +78,37 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) { } requestReceived := time.Now().Unix() repo := repository.GetNodeRepository() - ms := metricstore.GetMemoryStore() m := make(map[string][]string) + metricNames := make(map[string][]string) healthResults := make(map[string]metricstore.HealthCheckResult) startMs := time.Now() + // Step 1: Build nodeList and metricList per subcluster for _, node := range req.Nodes { if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil { m[sc] = append(m[sc], node.Hostname) } } - for sc, nl := range m { + for sc := range m { if sc != "" { metricList := archive.GetMetricConfigSubCluster(req.Cluster, sc) - metricNames := metricListToNames(metricList) - if results, err := ms.HealthCheck(req.Cluster, nl, metricNames); err == nil { - maps.Copy(healthResults, results) + metricNames[sc] = metricListToNames(metricList) + } + } + + // Step 2: Determine which metric store to query and perform health check + healthRepo, err := metricdispatch.GetHealthCheckRepo(req.Cluster) + if err != nil { + cclog.Warnf("updateNodeStates: no metric store for cluster %s, skipping health check: %v", req.Cluster, err) + } else { + for sc, nl := range m { + if sc != "" { + if results, err := healthRepo.HealthCheck(req.Cluster, nl, metricNames[sc]); err == nil { + maps.Copy(healthResults, results) + } } } } diff --git a/internal/api/rest.go b/internal/api/rest.go index 575b1809..fe722511 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -158,6 +158,7 @@ func (api *RestAPI) MountConfigAPIRoutes(r chi.Router) { // MountFrontendAPIRoutes registers frontend-specific API endpoints. // These routes support JWT generation and user configuration updates with session authentication. func (api *RestAPI) MountFrontendAPIRoutes(r chi.Router) { + r.Get("/logs/", api.getJournalLog) // Settings Frontend Uses SessionAuth if api.Authentication != nil { r.Get("/jwt/", api.getJWT) diff --git a/internal/auth/auth_test.go b/internal/auth/auth_test.go index 68961354..f8c6635c 100644 --- a/internal/auth/auth_test.go +++ b/internal/auth/auth_test.go @@ -49,7 +49,7 @@ func TestRateLimiterBehavior(t *testing.T) { limiter := getIPUserLimiter(ip, username) // Should allow first 5 attempts - for i := 0; i < 5; i++ { + for i := range 5 { if !limiter.Allow() { t.Errorf("Request %d should be allowed within rate limit", i+1) } diff --git a/internal/auth/jwtHelpers.go b/internal/auth/jwtHelpers.go index de59145e..8321b9c4 100644 --- a/internal/auth/jwtHelpers.go +++ b/internal/auth/jwtHelpers.go @@ -9,6 +9,7 @@ import ( "database/sql" "errors" "fmt" + "strings" "github.com/ClusterCockpit/cc-backend/internal/repository" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" @@ -80,11 +81,12 @@ func extractNameFromClaims(claims jwt.MapClaims) string { return "" } - name := fmt.Sprintf("%v", vals[0]) + var name strings.Builder + name.WriteString(fmt.Sprintf("%v", vals[0])) for i := 1; i < len(vals); i++ { - name += fmt.Sprintf(" %v", vals[i]) + name.WriteString(fmt.Sprintf(" %v", vals[i])) } - return name + return name.String() } } diff --git a/internal/config/config.go b/internal/config/config.go index 2e601ed7..f635b7e4 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -72,14 +72,17 @@ type ProgramConfig struct { // If exists, will enable dynamic zoom in frontend metric plots using the configured values EnableResampling *ResampleConfig `json:"resampling"` + // Systemd unit name for log viewer (default: "clustercockpit") + SystemdUnit string `json:"systemd-unit"` + // Node state retention configuration NodeStateRetention *NodeStateRetention `json:"nodestate-retention"` } type NodeStateRetention struct { - Policy string `json:"policy"` // "delete" or "parquet" - Age int `json:"age"` // hours, default 24 - TargetKind string `json:"target-kind"` // "file" or "s3" + Policy string `json:"policy"` // "delete" or "move" + Age int `json:"age"` // hours, default 24 + TargetKind string `json:"target-kind"` // "file" or "s3" TargetPath string `json:"target-path"` TargetEndpoint string `json:"target-endpoint"` TargetBucket string `json:"target-bucket"` diff --git a/internal/config/schema.go b/internal/config/schema.go index bd1b314e..5e02732d 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -77,24 +77,18 @@ var configSchema = ` "type": "integer" }, "emission-constant": { - "description": ".", + "description": "Energy mix CO2 emission constant [g/kWh]. If set, displays estimated CO2 emission for jobs.", "type": "integer" }, - "cron-frequency": { - "description": "Frequency of cron job workers.", - "type": "object", - "properties": { - "duration-worker": { - "description": "Duration Update Worker [Defaults to '5m']", - "type": "string" - }, - "footprint-worker": { - "description": "Metric-Footprint Update Worker [Defaults to '10m']", - "type": "string" - } - } + "machine-state-dir": { + "description": "Where to store MachineState files.", + "type": "string" }, - "enable-resampling": { + "systemd-unit": { + "description": "Systemd unit name for log viewer (default: 'clustercockpit').", + "type": "string" + }, + "resampling": { "description": "Enable dynamic zoom in frontend metric plots.", "type": "object", "properties": { @@ -136,9 +130,9 @@ var configSchema = ` "type": "object", "properties": { "policy": { - "description": "Retention policy: 'delete' to remove old rows, 'parquet' to archive then delete.", + "description": "Retention policy: 'delete' to remove old rows, 'move' to archive to Parquet then delete.", "type": "string", - "enum": ["delete", "parquet"] + "enum": ["delete", "move"] }, "age": { "description": "Retention age in hours (default: 24).", diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 965fd860..136a123b 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -287,6 +287,7 @@ type ComplexityRoot struct { Cluster func(childComplexity int) int CpusAllocated func(childComplexity int) int GpusAllocated func(childComplexity int) int + HealthData func(childComplexity int) int HealthState func(childComplexity int) int Hostname func(childComplexity int) int ID func(childComplexity int) int @@ -347,6 +348,7 @@ type ComplexityRoot struct { NodeStates func(childComplexity int, filter []*model.NodeFilter) int NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int + NodesWithMeta func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int Tags func(childComplexity int) int @@ -369,7 +371,7 @@ type ComplexityRoot struct { Series struct { Data func(childComplexity int) int Hostname func(childComplexity int) int - Id func(childComplexity int) int + ID func(childComplexity int) int Statistics func(childComplexity int) int } @@ -476,6 +478,7 @@ type NodeResolver interface { SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error) HealthState(ctx context.Context, obj *schema.Node) (string, error) MetaData(ctx context.Context, obj *schema.Node) (any, error) + HealthData(ctx context.Context, obj *schema.Node) (any, error) } type QueryResolver interface { Clusters(ctx context.Context) ([]*schema.Cluster, error) @@ -485,6 +488,7 @@ type QueryResolver interface { AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) Node(ctx context.Context, id string) (*schema.Node, error) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) + NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error) Job(ctx context.Context, id string) (*schema.Job, error) @@ -1452,6 +1456,12 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin } return e.complexity.Node.GpusAllocated(childComplexity), true + case "Node.healthData": + if e.complexity.Node.HealthData == nil { + break + } + + return e.complexity.Node.HealthData(childComplexity), true case "Node.healthState": if e.complexity.Node.HealthState == nil { break @@ -1785,6 +1795,17 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin } return e.complexity.Query.Nodes(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true + case "Query.nodesWithMeta": + if e.complexity.Query.NodesWithMeta == nil { + break + } + + args, err := ec.field_Query_nodesWithMeta_args(ctx, rawArgs) + if err != nil { + return 0, false + } + + return e.complexity.Query.NodesWithMeta(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true case "Query.rooflineHeatmap": if e.complexity.Query.RooflineHeatmap == nil { break @@ -1882,11 +1903,11 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return e.complexity.Series.Hostname(childComplexity), true case "Series.id": - if e.complexity.Series.Id == nil { + if e.complexity.Series.ID == nil { break } - return e.complexity.Series.Id(childComplexity), true + return e.complexity.Series.ID(childComplexity), true case "Series.statistics": if e.complexity.Series.Statistics == nil { break @@ -2302,6 +2323,7 @@ type Node { schedulerState: SchedulerState! healthState: MonitoringState! metaData: Any + healthData: Any } type NodeStates { @@ -2611,6 +2633,7 @@ type Query { ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! + nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStates(filter: [NodeFilter!]): [NodeStates!]! nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]! @@ -3268,6 +3291,22 @@ func (ec *executionContext) field_Query_node_args(ctx context.Context, rawArgs m return args, nil } +func (ec *executionContext) field_Query_nodesWithMeta_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { + var err error + args := map[string]any{} + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ) + if err != nil { + return nil, err + } + args["filter"] = arg0 + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "order", ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput) + if err != nil { + return nil, err + } + args["order"] = arg1 + return args, nil +} + func (ec *executionContext) field_Query_nodes_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} @@ -8258,6 +8297,35 @@ func (ec *executionContext) fieldContext_Node_metaData(_ context.Context, field return fc, nil } +func (ec *executionContext) _Node_healthData(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + return graphql.ResolveField( + ctx, + ec.OperationContext, + field, + ec.fieldContext_Node_healthData, + func(ctx context.Context) (any, error) { + return ec.resolvers.Node().HealthData(ctx, obj) + }, + nil, + ec.marshalOAny2interface, + true, + false, + ) +} + +func (ec *executionContext) fieldContext_Node_healthData(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Any does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) { return graphql.ResolveField( ctx, @@ -8428,6 +8496,8 @@ func (ec *executionContext) fieldContext_NodeStateResultList_items(_ context.Con return ec.fieldContext_Node_healthState(ctx, field) case "metaData": return ec.fieldContext_Node_metaData(ctx, field) + case "healthData": + return ec.fieldContext_Node_healthData(ctx, field) } return nil, fmt.Errorf("no field named %q was found under type Node", field.Name) }, @@ -9053,6 +9123,8 @@ func (ec *executionContext) fieldContext_Query_node(ctx context.Context, field g return ec.fieldContext_Node_healthState(ctx, field) case "metaData": return ec.fieldContext_Node_metaData(ctx, field) + case "healthData": + return ec.fieldContext_Node_healthData(ctx, field) } return nil, fmt.Errorf("no field named %q was found under type Node", field.Name) }, @@ -9118,6 +9190,53 @@ func (ec *executionContext) fieldContext_Query_nodes(ctx context.Context, field return fc, nil } +func (ec *executionContext) _Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { + return graphql.ResolveField( + ctx, + ec.OperationContext, + field, + ec.fieldContext_Query_nodesWithMeta, + func(ctx context.Context) (any, error) { + fc := graphql.GetFieldContext(ctx) + return ec.resolvers.Query().NodesWithMeta(ctx, fc.Args["filter"].([]*model.NodeFilter), fc.Args["order"].(*model.OrderByInput)) + }, + nil, + ec.marshalNNodeStateResultList2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStateResultList, + true, + true, + ) +} + +func (ec *executionContext) fieldContext_Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Query", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + switch field.Name { + case "items": + return ec.fieldContext_NodeStateResultList_items(ctx, field) + case "count": + return ec.fieldContext_NodeStateResultList_count(ctx, field) + } + return nil, fmt.Errorf("no field named %q was found under type NodeStateResultList", field.Name) + }, + } + defer func() { + if r := recover(); r != nil { + err = ec.Recover(ctx, r) + ec.Error(ctx, err) + } + }() + ctx = graphql.WithFieldContext(ctx, fc) + if fc.Args, err = ec.field_Query_nodesWithMeta_args(ctx, field.ArgumentMap(ec.Variables)); err != nil { + ec.Error(ctx, err) + return fc, err + } + return fc, nil +} + func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { return graphql.ResolveField( ctx, @@ -15744,6 +15863,39 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj continue } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + case "healthData": + field := field + + innerFunc := func(ctx context.Context, _ *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Node_healthData(ctx, field, obj) + return res + } + + if field.Deferrable != nil { + dfs, ok := deferred[field.Deferrable.Label] + di := 0 + if ok { + dfs.AddField(field) + di = len(dfs.Values) - 1 + } else { + dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) + deferred[field.Deferrable.Label] = dfs + } + dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { + return innerFunc(ctx, dfs) + }) + + // don't run the out.Concurrently() call below + out.Values[i] = graphql.Null + continue + } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) default: panic("unknown field " + strconv.Quote(field.Name)) @@ -16171,6 +16323,28 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) + case "nodesWithMeta": + field := field + + innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Query_nodesWithMeta(ctx, field) + if res == graphql.Null { + atomic.AddUint32(&fs.Invalids, 1) + } + return res + } + + rrm := func(ctx context.Context) graphql.Marshaler { + return ec.OperationContext.RootResolverMiddleware(ctx, + func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) case "nodeStates": field := field diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 059bd16d..0d56b02c 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -318,18 +318,39 @@ func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (sc if obj.NodeState != "" { return obj.NodeState, nil } else { - return "", fmt.Errorf("no SchedulerState (NodeState) on Object") + return "", fmt.Errorf("resolver: no SchedulerState (NodeState) on node object") } } // HealthState is the resolver for the healthState field. func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (string, error) { - panic(fmt.Errorf("not implemented: HealthState - healthState")) + if obj.HealthState != "" { + return string(obj.HealthState), nil + } else { + return "", fmt.Errorf("resolver: no HealthState (NodeState) on node object") + } } // MetaData is the resolver for the metaData field. func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) { - panic(fmt.Errorf("not implemented: MetaData - metaData")) + if obj.MetaData != nil { + return obj.MetaData, nil + } else { + cclog.Debug("resolver: no MetaData (NodeState) on node object") + emptyMeta := make(map[string]string, 0) + return emptyMeta, nil + } +} + +// HealthData is the resolver for the healthData field. +func (r *nodeResolver) HealthData(ctx context.Context, obj *schema.Node) (any, error) { + if obj.HealthData != nil { + return obj.HealthData, nil + } else { + cclog.Debug("resolver: no HealthData (NodeState) on node object") + emptyHealth := make(map[string][]string, 0) + return emptyHealth, nil + } } // Clusters is the resolver for the clusters field. @@ -398,6 +419,15 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o return &model.NodeStateResultList{Items: nodes, Count: &count}, err } +// NodesWithMeta is the resolver for the nodesWithMeta field. +func (r *queryResolver) NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) { + // Why Extra Handler? -> graphql.CollectAllFields(ctx) only returns toplevel fields (i.e.: items, count), and not subfields like item.metaData + repo := repository.GetNodeRepository() + nodes, err := repo.QueryNodesWithMeta(ctx, filter, nil, order) // Ignore Paging, Order Unused + count := len(nodes) + return &model.NodeStateResultList{Items: nodes, Count: &count}, err +} + // NodeStates is the resolver for the nodeStates field. func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) { repo := repository.GetNodeRepository() diff --git a/internal/importer/handleImport.go b/internal/importer/handleImport.go index 2ac35ea9..68b6db9c 100644 --- a/internal/importer/handleImport.go +++ b/internal/importer/handleImport.go @@ -38,7 +38,7 @@ import ( func HandleImportFlag(flag string) error { r := repository.GetJobRepository() - for _, pair := range strings.Split(flag, ",") { + for pair := range strings.SplitSeq(flag, ",") { files := strings.Split(pair, ":") if len(files) != 2 { return fmt.Errorf("REPOSITORY/INIT > invalid import flag format") @@ -102,7 +102,7 @@ func HandleImportFlag(flag string) error { return err } - id, err := r.InsertJob(&job) + id, err := r.InsertJobDirect(&job) if err != nil { cclog.Warn("Error while job db insert") return err diff --git a/internal/importer/importer_test.go b/internal/importer/importer_test.go index f53e3a9d..cb4dca89 100644 --- a/internal/importer/importer_test.go +++ b/internal/importer/importer_test.go @@ -165,7 +165,7 @@ func TestHandleImportFlag(t *testing.T) { } result := readResult(t, testname) - job, err := r.FindCached(&result.JobId, &result.Cluster, &result.StartTime) + job, err := r.Find(&result.JobId, &result.Cluster, &result.StartTime) if err != nil { t.Fatal(err) } diff --git a/internal/metricdispatch/metricdata.go b/internal/metricdispatch/metricdata.go index 9626ac86..36a10004 100755 --- a/internal/metricdispatch/metricdata.go +++ b/internal/metricdispatch/metricdata.go @@ -52,6 +52,11 @@ type MetricDataRepository interface { resolution int, from, to time.Time, ctx context.Context) (map[string]schema.JobData, error) + + // HealthCheck evaluates the monitoring state for a set of nodes against expected metrics. + HealthCheck(cluster string, + nodes []string, + metrics []string) (map[string]metricstore.HealthCheckResult, error) } type CCMetricStoreConfig struct { @@ -110,3 +115,9 @@ func GetMetricDataRepo(cluster string, subcluster string) (MetricDataRepository, return repo, nil } + +// GetHealthCheckRepo returns the MetricDataRepository for performing health checks on a cluster. +// It uses the same fallback logic as GetMetricDataRepo: cluster → wildcard → internal. +func GetHealthCheckRepo(cluster string) (MetricDataRepository, error) { + return GetMetricDataRepo(cluster, "") +} diff --git a/internal/metricstoreclient/cc-metric-store.go b/internal/metricstoreclient/cc-metric-store.go index 4472b825..81add789 100644 --- a/internal/metricstoreclient/cc-metric-store.go +++ b/internal/metricstoreclient/cc-metric-store.go @@ -63,6 +63,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/pkg/archive" + "github.com/ClusterCockpit/cc-backend/pkg/metricstore" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" ) @@ -653,6 +654,54 @@ func (ccms *CCMetricStore) LoadNodeListData( return data, nil } +// HealthCheck queries the external cc-metric-store's health check endpoint. +// It sends a HealthCheckReq as the request body to /api/healthcheck and +// returns the per-node health check results. +func (ccms *CCMetricStore) HealthCheck(cluster string, + nodes []string, metrics []string, +) (map[string]metricstore.HealthCheckResult, error) { + req := metricstore.HealthCheckReq{ + Cluster: cluster, + Nodes: nodes, + MetricNames: metrics, + } + + buf := &bytes.Buffer{} + if err := json.NewEncoder(buf).Encode(req); err != nil { + cclog.Errorf("Error while encoding health check request body: %s", err.Error()) + return nil, err + } + + endpoint := fmt.Sprintf("%s/api/healthcheck", ccms.url) + httpReq, err := http.NewRequest(http.MethodGet, endpoint, buf) + if err != nil { + cclog.Errorf("Error while building health check request: %s", err.Error()) + return nil, err + } + if ccms.jwt != "" { + httpReq.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt)) + } + + res, err := ccms.client.Do(httpReq) + if err != nil { + cclog.Errorf("Error while performing health check request: %s", err.Error()) + return nil, err + } + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + return nil, fmt.Errorf("'%s': HTTP Status: %s", endpoint, res.Status) + } + + var results map[string]metricstore.HealthCheckResult + if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&results); err != nil { + cclog.Errorf("Error while decoding health check response: %s", err.Error()) + return nil, err + } + + return results, nil +} + // sanitizeStats replaces NaN values in statistics with 0 to enable JSON marshaling. // Regular float64 values cannot be JSONed when NaN. func sanitizeStats(avg, min, max *schema.Float) { diff --git a/internal/repository/job.go b/internal/repository/job.go index 6b0b2b12..a1cd9719 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -844,6 +844,8 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64 query = query.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)") } + query = query.OrderBy("job.cluster ASC", "job.subcluster ASC", "job.project ASC", "job.start_time ASC") + rows, err := query.RunWith(r.stmtCache).Query() if err != nil { cclog.Errorf("Error while running FindJobsBetween query: %v", err) diff --git a/internal/repository/jobCreate.go b/internal/repository/jobCreate.go index 9f4f366d..07c8ce11 100644 --- a/internal/repository/jobCreate.go +++ b/internal/repository/jobCreate.go @@ -30,6 +30,27 @@ const NamedJobInsert string = `INSERT INTO job ( :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data );` +// InsertJobDirect inserts a job directly into the job table (not job_cache). +// Use this when the returned ID will be used for operations on the job table +// (e.g., adding tags), or for imported jobs that are already completed. +func (r *JobRepository) InsertJobDirect(job *schema.Job) (int64, error) { + r.Mutex.Lock() + defer r.Mutex.Unlock() + + res, err := r.DB.NamedExec(NamedJobInsert, job) + if err != nil { + cclog.Warn("Error while NamedJobInsert (direct)") + return 0, err + } + id, err := res.LastInsertId() + if err != nil { + cclog.Warn("Error while getting last insert ID (direct)") + return 0, err + } + + return id, nil +} + func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) { r.Mutex.Lock() defer r.Mutex.Unlock() @@ -85,6 +106,22 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) { return nil, err } + // Resolve correct job.id from the job table. The IDs read from job_cache + // are from a different auto-increment sequence and must not be used to + // query the job table. + for _, job := range jobs { + var newID int64 + if err := sq.Select("job.id").From("job"). + Where("job.job_id = ? AND job.cluster = ? AND job.start_time = ?", + job.JobID, job.Cluster, job.StartTime). + RunWith(r.stmtCache).QueryRow().Scan(&newID); err != nil { + cclog.Warnf("SyncJobs: could not resolve job table id for job %d on %s: %v", + job.JobID, job.Cluster, err) + continue + } + job.ID = &newID + } + return jobs, nil } @@ -132,6 +169,28 @@ func (r *JobRepository) Start(job *schema.Job) (id int64, err error) { return r.InsertJob(job) } +// StartDirect inserts a new job directly into the job table (not job_cache). +// Use this when the returned ID will immediately be used for job table +// operations such as adding tags. +func (r *JobRepository) StartDirect(job *schema.Job) (id int64, err error) { + job.RawFootprint, err = json.Marshal(job.Footprint) + if err != nil { + return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err) + } + + job.RawResources, err = json.Marshal(job.Resources) + if err != nil { + return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err) + } + + job.RawMetaData, err = json.Marshal(job.MetaData) + if err != nil { + return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err) + } + + return r.InsertJobDirect(job) +} + // Stop updates the job with the database id jobId using the provided arguments. func (r *JobRepository) Stop( jobID int64, diff --git a/internal/repository/jobCreate_test.go b/internal/repository/jobCreate_test.go index 9e72555f..3f2ee6fa 100644 --- a/internal/repository/jobCreate_test.go +++ b/internal/repository/jobCreate_test.go @@ -489,6 +489,34 @@ func TestSyncJobs(t *testing.T) { require.NoError(t, err) }) + t.Run("sync returns job table IDs not cache IDs", func(t *testing.T) { + // Ensure cache is empty first + _, err := r.DB.Exec("DELETE FROM job_cache") + require.NoError(t, err) + + // Insert a job into job_cache + job := createTestJob(999015, "testcluster") + cacheID, err := r.Start(job) + require.NoError(t, err) + + // Sync jobs + jobs, err := r.SyncJobs() + require.NoError(t, err) + require.Equal(t, 1, len(jobs)) + + // The returned ID must refer to the job table, not job_cache + var jobTableID int64 + err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?", + jobs[0].JobID, jobs[0].Cluster, jobs[0].StartTime).Scan(&jobTableID) + require.NoError(t, err) + assert.Equal(t, jobTableID, *jobs[0].ID, + "returned ID should match the job table row, not the cache ID (%d)", cacheID) + + // Clean up + _, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster) + require.NoError(t, err) + }) + t.Run("sync with empty cache returns empty list", func(t *testing.T) { // Ensure cache is empty _, err := r.DB.Exec("DELETE FROM job_cache") @@ -500,3 +528,80 @@ func TestSyncJobs(t *testing.T) { assert.Equal(t, 0, len(jobs), "Should return empty list when cache is empty") }) } + +func TestInsertJobDirect(t *testing.T) { + r := setup(t) + + t.Run("inserts into job table not cache", func(t *testing.T) { + job := createTestJob(999020, "testcluster") + job.RawResources, _ = json.Marshal(job.Resources) + job.RawFootprint, _ = json.Marshal(job.Footprint) + job.RawMetaData, _ = json.Marshal(job.MetaData) + + id, err := r.InsertJobDirect(job) + require.NoError(t, err, "InsertJobDirect should succeed") + assert.Greater(t, id, int64(0), "Should return valid insert ID") + + // Verify job is in job table + var count int + err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE id = ?", id).Scan(&count) + require.NoError(t, err) + assert.Equal(t, 1, count, "Job should be in job table") + + // Verify job is NOT in job_cache + err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?", + job.JobID, job.Cluster).Scan(&count) + require.NoError(t, err) + assert.Equal(t, 0, count, "Job should NOT be in job_cache") + + // Clean up + _, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id) + require.NoError(t, err) + }) + + t.Run("returned ID works for tag operations", func(t *testing.T) { + job := createTestJob(999021, "testcluster") + job.RawResources, _ = json.Marshal(job.Resources) + job.RawFootprint, _ = json.Marshal(job.Footprint) + job.RawMetaData, _ = json.Marshal(job.MetaData) + + id, err := r.InsertJobDirect(job) + require.NoError(t, err) + + // Adding a tag using the returned ID should succeed (FK constraint on jobtag) + err = r.ImportTag(id, "test_type", "test_name", "global") + require.NoError(t, err, "ImportTag should succeed with direct insert ID") + + // Clean up + _, err = r.DB.Exec("DELETE FROM jobtag WHERE job_id = ?", id) + require.NoError(t, err) + _, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id) + require.NoError(t, err) + }) +} + +func TestStartDirect(t *testing.T) { + r := setup(t) + + t.Run("inserts into job table with JSON encoding", func(t *testing.T) { + job := createTestJob(999022, "testcluster") + + id, err := r.StartDirect(job) + require.NoError(t, err, "StartDirect should succeed") + assert.Greater(t, id, int64(0)) + + // Verify job is in job table with encoded JSON + var rawResources []byte + err = r.DB.QueryRow("SELECT resources FROM job WHERE id = ?", id).Scan(&rawResources) + require.NoError(t, err) + + var resources []*schema.Resource + err = json.Unmarshal(rawResources, &resources) + require.NoError(t, err, "Resources should be valid JSON") + assert.Equal(t, "node01", resources[0].Hostname) + + // Clean up + _, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id) + require.NoError(t, err) + }) +} diff --git a/internal/repository/node.go b/internal/repository/node.go index 2ffe6698..09415bef 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -154,16 +154,14 @@ func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, err return nil, err } - // NEEDS METADATA BY ID - // if withMeta { - // var err error - // var meta map[string]string - // if meta, err = r.FetchMetadata(hostname, cluster); err != nil { - // cclog.Warnf("Error while fetching metadata for node '%s'", hostname) - // return nil, err - // } - // node.MetaData = meta - // } + if withMeta { + meta, metaErr := r.FetchMetadata(node.Hostname, node.Cluster) + if metaErr != nil { + cclog.Warnf("Error while fetching metadata for node ID '%d': %v", id, metaErr) + return nil, metaErr + } + node.MetaData = meta + } return node, nil } @@ -285,7 +283,7 @@ func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode Join("node ON node_state.node_id = node.id"). Where(sq.Lt{"node_state.time_stamp": cutoff}). Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))"). - OrderBy("node_state.time_stamp ASC"). + OrderBy("node.cluster ASC", "node.subcluster ASC", "node.hostname ASC", "node_state.time_stamp ASC"). RunWith(r.DB).Query() if err != nil { return nil, err @@ -295,13 +293,15 @@ func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode var result []NodeStateWithNode for rows.Next() { var ns NodeStateWithNode + var healthMetrics sql.NullString if err := rows.Scan(&ns.ID, &ns.TimeStamp, &ns.NodeState, - &ns.HealthState, &ns.HealthMetrics, + &ns.HealthState, &healthMetrics, &ns.CpusAllocated, &ns.MemoryAllocated, &ns.GpusAllocated, &ns.JobsRunning, &ns.Hostname, &ns.Cluster, &ns.SubCluster); err != nil { return nil, err } + ns.HealthMetrics = healthMetrics.String result = append(result, ns) } return result, nil @@ -382,6 +382,81 @@ func (r *NodeRepository) QueryNodes( return nodes, nil } +// QueryNodesWithMeta returns a list of nodes based on a node filter. It always operates +// on the last state (largest timestamp). It includes both (!) optional JSON column data +func (r *NodeRepository) QueryNodesWithMeta( + ctx context.Context, + filters []*model.NodeFilter, + page *model.PageRequest, + order *model.OrderByInput, // Currently unused! +) ([]*schema.Node, error) { + query, qerr := AccessCheck(ctx, + sq.Select("node.hostname", "node.cluster", "node.subcluster", + "node_state.node_state", "node_state.health_state", + "node.meta_data", "node_state.health_metrics"). + From("node"). + Join("node_state ON node_state.node_id = node.id"). + Where(latestStateCondition())) + if qerr != nil { + return nil, qerr + } + + query = applyNodeFilters(query, filters) + query = query.OrderBy("node.hostname ASC") + + if page != nil && page.ItemsPerPage != -1 { + limit := uint64(page.ItemsPerPage) + query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit) + } + + rows, err := query.RunWith(r.stmtCache).Query() + if err != nil { + queryString, queryVars, _ := query.ToSql() + cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) + return nil, err + } + + nodes := make([]*schema.Node, 0) + for rows.Next() { + node := schema.Node{} + RawMetaData := make([]byte, 0) + RawMetricHealth := make([]byte, 0) + + if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster, + &node.NodeState, &node.HealthState, &RawMetaData, &RawMetricHealth); err != nil { + rows.Close() + cclog.Warn("Error while scanning rows (QueryNodes)") + return nil, err + } + + if len(RawMetaData) == 0 { + node.MetaData = nil + } else { + metaData := make(map[string]string) + if err := json.Unmarshal(RawMetaData, &metaData); err != nil { + cclog.Warn("Error while unmarshaling raw metadata json") + return nil, err + } + node.MetaData = metaData + } + + if len(RawMetricHealth) == 0 { + node.HealthData = nil + } else { + healthData := make(map[string][]string) + if err := json.Unmarshal(RawMetricHealth, &healthData); err != nil { + cclog.Warn("Error while unmarshaling raw healthdata json") + return nil, err + } + node.HealthData = healthData + } + + nodes = append(nodes, &node) + } + + return nodes, nil +} + // CountNodes returns the total matched nodes based on a node filter. It always operates // on the last state (largest timestamp) per node. func (r *NodeRepository) CountNodes( diff --git a/internal/repository/transaction.go b/internal/repository/transaction.go index 9074428f..3ae0562d 100644 --- a/internal/repository/transaction.go +++ b/internal/repository/transaction.go @@ -62,7 +62,7 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error { func (r *JobRepository) TransactionAddNamed( t *Transaction, query string, - args ...interface{}, + args ...any, ) (int64, error) { if t.tx == nil { return 0, fmt.Errorf("transaction is nil or already completed") @@ -82,7 +82,7 @@ func (r *JobRepository) TransactionAddNamed( } // TransactionAdd executes a query within the transaction. -func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) { +func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...any) (int64, error) { if t.tx == nil { return 0, fmt.Errorf("transaction is nil or already completed") } diff --git a/internal/repository/transaction_test.go b/internal/repository/transaction_test.go index 1832bea0..777a2a45 100644 --- a/internal/repository/transaction_test.go +++ b/internal/repository/transaction_test.go @@ -189,7 +189,7 @@ func TestTransactionAddNamed(t *testing.T) { tx := &Transaction{tx: nil} _, err := r.TransactionAddNamed(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)", - map[string]interface{}{"type": "test", "name": "test", "scope": "global"}) + map[string]any{"type": "test", "name": "test", "scope": "global"}) assert.Error(t, err, "Should error on nil transaction") assert.Contains(t, err.Error(), "transaction is nil or already completed") }) @@ -204,7 +204,7 @@ func TestTransactionMultipleOperations(t *testing.T) { defer tx.Rollback() // Insert multiple tags - for i := 0; i < 5; i++ { + for i := range 5 { _, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)", "test_type", "test_multi_"+string(rune('a'+i)), "global") @@ -230,7 +230,7 @@ func TestTransactionMultipleOperations(t *testing.T) { require.NoError(t, err) // Insert multiple tags - for i := 0; i < 3; i++ { + for i := range 3 { _, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)", "test_type", "test_rollback_"+string(rune('a'+i)), "global") diff --git a/internal/repository/user.go b/internal/repository/user.go index 42a22384..966646dd 100644 --- a/internal/repository/user.go +++ b/internal/repository/user.go @@ -126,7 +126,7 @@ func (r *UserRepository) AddUser(user *schema.User) error { projectsJson, _ := json.Marshal(user.Projects) cols := []string{"username", "roles", "projects"} - vals := []interface{}{user.Username, string(rolesJson), string(projectsJson)} + vals := []any{user.Username, string(rolesJson), string(projectsJson)} if user.Name != "" { cols = append(cols, "name") @@ -392,7 +392,7 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro } if exists { - var result interface{} + var result any if len(newprojects) == 0 { result = "[]" } else { diff --git a/internal/routerConfig/routes.go b/internal/routerConfig/routes.go index e3978ddc..b6137ddb 100644 --- a/internal/routerConfig/routes.go +++ b/internal/routerConfig/routes.go @@ -23,7 +23,7 @@ import ( "github.com/go-chi/chi/v5" ) -type InfoType map[string]interface{} +type InfoType map[string]any type Route struct { Route string @@ -50,6 +50,7 @@ var routes []Route = []Route{ {"/monitoring/status/{cluster}", "monitoring/status.tmpl", " Dashboard - ClusterCockpit", false, setupClusterStatusRoute}, {"/monitoring/status/detail/{cluster}", "monitoring/status.tmpl", "Status of - ClusterCockpit", false, setupClusterDetailRoute}, {"/monitoring/dashboard/{cluster}", "monitoring/dashboard.tmpl", " Dashboard - ClusterCockpit", false, setupDashboardRoute}, + {"/monitoring/logs", "monitoring/logs.tmpl", "Logs - ClusterCockpit", false, func(i InfoType, r *http.Request) InfoType { return i }}, } func setupHomeRoute(i InfoType, r *http.Request) InfoType { @@ -192,7 +193,7 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType { func setupTaglistRoute(i InfoType, r *http.Request) InfoType { jobRepo := repository.GetJobRepository() tags, counts, err := jobRepo.CountTags(repository.GetUserFromContext(r.Context())) - tagMap := make(map[string][]map[string]interface{}) + tagMap := make(map[string][]map[string]any) if err != nil { cclog.Warnf("GetTags failed: %s", err.Error()) i["tagmap"] = tagMap @@ -203,7 +204,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType { // Uses tag.ID as second Map-Key component to differentiate tags with identical names if userAuthlevel >= 4 { // Support+ : Show tags for all scopes, regardless of count for _, tag := range tags { - tagItem := map[string]interface{}{ + tagItem := map[string]any{ "id": tag.ID, "name": tag.Name, "scope": tag.Scope, @@ -215,7 +216,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType { for _, tag := range tags { tagCount := counts[fmt.Sprint(tag.Type, tag.Name, tag.ID)] if ((tag.Scope == "global" || tag.Scope == "admin") && tagCount >= 1) || (tag.Scope != "global" && tag.Scope != "admin") { - tagItem := map[string]interface{}{ + tagItem := map[string]any{ "id": tag.ID, "name": tag.Name, "scope": tag.Scope, @@ -231,8 +232,8 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType { } // FIXME: Lots of redundant code. Needs refactoring -func buildFilterPresets(query url.Values) map[string]interface{} { - filterPresets := map[string]interface{}{} +func buildFilterPresets(query url.Values) map[string]any { + filterPresets := map[string]any{} if query.Get("cluster") != "" { filterPresets["cluster"] = query.Get("cluster") @@ -376,14 +377,14 @@ func buildFilterPresets(query url.Values) map[string]interface{} { } } if len(query["stat"]) != 0 { - statList := make([]map[string]interface{}, 0) + statList := make([]map[string]any, 0) for _, statEntry := range query["stat"] { parts := strings.Split(statEntry, "-") if len(parts) == 3 { // Metric Footprint Stat Field, from - to a, e1 := strconv.ParseInt(parts[1], 10, 64) b, e2 := strconv.ParseInt(parts[2], 10, 64) if e1 == nil && e2 == nil { - statEntry := map[string]interface{}{ + statEntry := map[string]any{ "field": parts[0], "from": a, "to": b, @@ -400,7 +401,6 @@ func buildFilterPresets(query url.Values) map[string]interface{} { func SetupRoutes(router chi.Router, buildInfo web.Build) { userCfgRepo := repository.GetUserCfgRepo() for _, route := range routes { - route := route router.HandleFunc(route.Route, func(rw http.ResponseWriter, r *http.Request) { conf, err := userCfgRepo.GetUIConfig(repository.GetUserFromContext(r.Context())) if err != nil { @@ -409,7 +409,7 @@ func SetupRoutes(router chi.Router, buildInfo web.Build) { } title := route.Title - infos := route.Setup(map[string]interface{}{}, r) + infos := route.Setup(map[string]any{}, r) if id, ok := infos["id"]; ok { title = strings.Replace(route.Title, "", id.(string), 1) if sid, ok := infos["sid"]; ok { // 2nd ID element diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 5519cbf0..c82c87bc 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -19,6 +19,14 @@ import ( "github.com/ClusterCockpit/cc-lib/v2/util" ) +func metadataKeys(m map[string]string) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return keys +} + const ( // defaultConfigPath is the default path for application tagging configuration defaultConfigPath = "./var/tagger/apps" @@ -52,7 +60,10 @@ func (t *AppTagger) scanApp(f *os.File, fns string) { ai := appInfo{tag: tag, patterns: make([]*regexp.Regexp, 0)} for scanner.Scan() { - line := scanner.Text() + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } re, err := regexp.Compile(line) if err != nil { cclog.Errorf("invalid regex pattern '%s' in %s: %v", line, fns, err) @@ -68,6 +79,8 @@ func (t *AppTagger) scanApp(f *os.File, fns string) { break } } + + cclog.Infof("AppTagger loaded %d patterns for %s", len(ai.patterns), tag) t.apps = append(t.apps, ai) } @@ -86,6 +99,9 @@ func (t *AppTagger) EventCallback() { } for _, fn := range files { + if fn.IsDir() { + continue + } fns := fn.Name() cclog.Debugf("Process: %s", fns) f, err := os.Open(filepath.Join(t.cfgPath, fns)) @@ -121,6 +137,9 @@ func (t *AppTagger) Register() error { } for _, fn := range files { + if fn.IsDir() { + continue + } fns := fn.Name() cclog.Debugf("Process: %s", fns) f, err := os.Open(filepath.Join(t.cfgPath, fns)) @@ -147,29 +166,54 @@ func (t *AppTagger) Register() error { // Only the first matching application is tagged. func (t *AppTagger) Match(job *schema.Job) { r := repository.GetJobRepository() + + if len(t.apps) == 0 { + cclog.Warn("AppTagger: no app patterns loaded, skipping match") + return + } + metadata, err := r.FetchMetadata(job) if err != nil { - cclog.Infof("Cannot fetch metadata for job: %d on %s", job.JobID, job.Cluster) + cclog.Infof("AppTagger: cannot fetch metadata for job %d on %s: %v", job.JobID, job.Cluster, err) + return + } + + if metadata == nil { + cclog.Infof("AppTagger: metadata is nil for job %d on %s", job.JobID, job.Cluster) return } jobscript, ok := metadata["jobScript"] - if ok { - id := *job.ID - jobscriptLower := strings.ToLower(jobscript) + if !ok { + cclog.Infof("AppTagger: no 'jobScript' key in metadata for job %d on %s (keys: %v)", + job.JobID, job.Cluster, metadataKeys(metadata)) + return + } - out: - for _, a := range t.apps { - for _, re := range a.patterns { - if re.MatchString(jobscriptLower) { - if !r.HasTag(id, t.tagType, a.tag) { - r.AddTagOrCreateDirect(id, t.tagType, a.tag) - break out + if len(jobscript) == 0 { + cclog.Infof("AppTagger: empty jobScript for job %d on %s", job.JobID, job.Cluster) + return + } + + id := *job.ID + jobscriptLower := strings.ToLower(jobscript) + cclog.Debugf("AppTagger: matching job %d (script length: %d) against %d apps", id, len(jobscriptLower), len(t.apps)) + + for _, a := range t.apps { + for _, re := range a.patterns { + if re.MatchString(jobscriptLower) { + if r.HasTag(id, t.tagType, a.tag) { + cclog.Debugf("AppTagger: job %d already has tag %s:%s, skipping", id, t.tagType, a.tag) + } else { + cclog.Infof("AppTagger: pattern '%s' matched for app '%s' on job %d", re.String(), a.tag, id) + if _, err := r.AddTagOrCreateDirect(id, t.tagType, a.tag); err != nil { + cclog.Errorf("AppTagger: failed to add tag '%s' to job %d: %v", a.tag, id, err) } } + return } } - } else { - cclog.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster) } + + cclog.Debugf("AppTagger: no pattern matched for job %d on %s", id, job.Cluster) } diff --git a/internal/tagger/tagger.go b/internal/tagger/tagger.go index 067f16a9..bde3817d 100644 --- a/internal/tagger/tagger.go +++ b/internal/tagger/tagger.go @@ -51,10 +51,14 @@ func newTagger() { jobTagger.stopTaggers = append(jobTagger.stopTaggers, &JobClassTagger{}) for _, tagger := range jobTagger.startTaggers { - tagger.Register() + if err := tagger.Register(); err != nil { + cclog.Errorf("failed to register start tagger: %s", err) + } } for _, tagger := range jobTagger.stopTaggers { - tagger.Register() + if err := tagger.Register(); err != nil { + cclog.Errorf("failed to register stop tagger: %s", err) + } } } diff --git a/internal/taskmanager/nodestateRetentionService.go b/internal/taskmanager/nodestateRetentionService.go index 9a704502..b6306849 100644 --- a/internal/taskmanager/nodestateRetentionService.go +++ b/internal/taskmanager/nodestateRetentionService.go @@ -18,7 +18,7 @@ import ( func RegisterNodeStateRetentionDeleteService(ageHours int) { cclog.Info("Register node state retention delete service") - s.NewJob(gocron.DurationJob(1*time.Hour), + s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(2, 0, 0))), gocron.NewTask( func() { cutoff := time.Now().Unix() - int64(ageHours*3600) @@ -32,8 +32,8 @@ func RegisterNodeStateRetentionDeleteService(ageHours int) { })) } -func RegisterNodeStateRetentionParquetService(cfg *config.NodeStateRetention) { - cclog.Info("Register node state retention parquet service") +func RegisterNodeStateRetentionMoveService(cfg *config.NodeStateRetention) { + cclog.Info("Register node state retention move service") maxFileSizeMB := cfg.MaxFileSizeMB if maxFileSizeMB <= 0 { @@ -63,11 +63,11 @@ func RegisterNodeStateRetentionParquetService(cfg *config.NodeStateRetention) { } if err != nil { - cclog.Errorf("NodeState parquet retention: failed to create target: %v", err) + cclog.Errorf("NodeState move retention: failed to create target: %v", err) return } - s.NewJob(gocron.DurationJob(1*time.Hour), + s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(2, 30, 0))), gocron.NewTask( func() { cutoff := time.Now().Unix() - int64(ageHours*3600) @@ -75,14 +75,14 @@ func RegisterNodeStateRetentionParquetService(cfg *config.NodeStateRetention) { rows, err := nodeRepo.FindNodeStatesBefore(cutoff) if err != nil { - cclog.Errorf("NodeState parquet retention: error finding rows: %v", err) + cclog.Errorf("NodeState move retention: error finding rows: %v", err) return } if len(rows) == 0 { return } - cclog.Infof("NodeState parquet retention: archiving %d rows", len(rows)) + cclog.Infof("NodeState move retention: archiving %d rows", len(rows)) pw := pqarchive.NewNodeStateParquetWriter(target, maxFileSizeMB) for _, ns := range rows { @@ -100,21 +100,21 @@ func RegisterNodeStateRetentionParquetService(cfg *config.NodeStateRetention) { SubCluster: ns.SubCluster, } if err := pw.AddRow(row); err != nil { - cclog.Errorf("NodeState parquet retention: add row: %v", err) + cclog.Errorf("NodeState move retention: add row: %v", err) continue } } if err := pw.Close(); err != nil { - cclog.Errorf("NodeState parquet retention: close writer: %v", err) + cclog.Errorf("NodeState move retention: close writer: %v", err) return } cnt, err := nodeRepo.DeleteNodeStatesBefore(cutoff) if err != nil { - cclog.Errorf("NodeState parquet retention: error deleting rows: %v", err) + cclog.Errorf("NodeState move retention: error deleting rows: %v", err) } else { - cclog.Infof("NodeState parquet retention: deleted %d rows from db", cnt) + cclog.Infof("NodeState move retention: deleted %d rows from db", cnt) } })) } diff --git a/internal/taskmanager/retentionService.go b/internal/taskmanager/retentionService.go index d863bb91..eda452e6 100644 --- a/internal/taskmanager/retentionService.go +++ b/internal/taskmanager/retentionService.go @@ -45,13 +45,13 @@ func createTargetBackend(cfg Retention) (archive.ArchiveBackend, error) { switch cfg.TargetKind { case "s3": - raw, err = json.Marshal(map[string]interface{}{ - "kind": "s3", - "endpoint": cfg.TargetEndpoint, - "bucket": cfg.TargetBucket, - "access-key": cfg.TargetAccessKey, - "secret-key": cfg.TargetSecretKey, - "region": cfg.TargetRegion, + raw, err = json.Marshal(map[string]any{ + "kind": "s3", + "endpoint": cfg.TargetEndpoint, + "bucket": cfg.TargetBucket, + "access-key": cfg.TargetAccessKey, + "secret-key": cfg.TargetSecretKey, + "region": cfg.TargetRegion, "use-path-style": cfg.TargetUsePathStyle, }) default: diff --git a/internal/taskmanager/taskManager.go b/internal/taskmanager/taskManager.go index 529395b5..b25b2a93 100644 --- a/internal/taskmanager/taskManager.go +++ b/internal/taskmanager/taskManager.go @@ -154,8 +154,8 @@ func initNodeStateRetention() { switch cfg.Policy { case "delete": RegisterNodeStateRetentionDeleteService(age) - case "parquet": - RegisterNodeStateRetentionParquetService(cfg) + case "move": + RegisterNodeStateRetentionMoveService(cfg) default: cclog.Warnf("Unknown nodestate-retention policy: %s", cfg.Policy) } diff --git a/pkg/archive/fsBackend.go b/pkg/archive/fsBackend.go index 61921d70..07b86e2b 100644 --- a/pkg/archive/fsBackend.go +++ b/pkg/archive/fsBackend.go @@ -16,6 +16,7 @@ import ( "os" "path" "path/filepath" + "slices" "strconv" "strings" "sync" @@ -692,13 +693,7 @@ func (fsa *FsArchive) StoreClusterCfg(name string, config *schema.Cluster) error } // Update clusters list if new - found := false - for _, c := range fsa.clusters { - if c == name { - found = true - break - } - } + found := slices.Contains(fsa.clusters, name) if !found { fsa.clusters = append(fsa.clusters, name) } diff --git a/pkg/archive/parquet/nodestate_writer.go b/pkg/archive/parquet/nodestate_writer.go index 053417d6..074e02e4 100644 --- a/pkg/archive/parquet/nodestate_writer.go +++ b/pkg/archive/parquet/nodestate_writer.go @@ -83,7 +83,13 @@ func writeNodeStateParquetBytes(rows []ParquetNodeStateRow) ([]byte, error) { var buf bytes.Buffer writer := pq.NewGenericWriter[ParquetNodeStateRow](&buf, - pq.Compression(&pq.Snappy), + pq.Compression(&pq.Zstd), + pq.SortingWriterConfig(pq.SortingColumns( + pq.Ascending("cluster"), + pq.Ascending("subcluster"), + pq.Ascending("hostname"), + pq.Ascending("time_stamp"), + )), ) if _, err := writer.Write(rows); err != nil { diff --git a/pkg/archive/parquet/writer.go b/pkg/archive/parquet/writer.go index 2669a9c8..bfe4490f 100644 --- a/pkg/archive/parquet/writer.go +++ b/pkg/archive/parquet/writer.go @@ -87,7 +87,12 @@ func writeParquetBytes(rows []ParquetJobRow) ([]byte, error) { var buf bytes.Buffer writer := pq.NewGenericWriter[ParquetJobRow](&buf, - pq.Compression(&pq.Snappy), + pq.Compression(&pq.Zstd), + pq.SortingWriterConfig(pq.SortingColumns( + pq.Ascending("sub_cluster"), + pq.Ascending("project"), + pq.Ascending("start_time"), + )), ) if _, err := writer.Write(rows); err != nil { diff --git a/pkg/archive/parquet/writer_test.go b/pkg/archive/parquet/writer_test.go index 57b4ca4c..9515edc3 100644 --- a/pkg/archive/parquet/writer_test.go +++ b/pkg/archive/parquet/writer_test.go @@ -39,18 +39,18 @@ func (m *memTarget) WriteFile(name string, data []byte) error { func makeTestJob(jobID int64) (*schema.Job, *schema.JobData) { meta := &schema.Job{ - JobID: jobID, - Cluster: "testcluster", - SubCluster: "sc0", - Project: "testproject", - User: "testuser", - State: schema.JobStateCompleted, - StartTime: 1700000000, - Duration: 3600, - Walltime: 7200, - NumNodes: 2, + JobID: jobID, + Cluster: "testcluster", + SubCluster: "sc0", + Project: "testproject", + User: "testuser", + State: schema.JobStateCompleted, + StartTime: 1700000000, + Duration: 3600, + Walltime: 7200, + NumNodes: 2, NumHWThreads: 16, - SMT: 1, + SMT: 1, Resources: []*schema.Resource{ {Hostname: "node001"}, {Hostname: "node002"}, @@ -141,7 +141,7 @@ func TestParquetWriterSingleBatch(t *testing.T) { target := newMemTarget() pw := NewParquetWriter(target, 512) - for i := int64(0); i < 5; i++ { + for i := range int64(5) { meta, data := makeTestJob(i) row, err := JobToParquetRow(meta, data) if err != nil { @@ -179,7 +179,7 @@ func TestParquetWriterBatching(t *testing.T) { pw := NewParquetWriter(target, 0) // 0 MB means every job triggers a flush pw.maxSizeBytes = 1 // Force flush after every row - for i := int64(0); i < 3; i++ { + for i := range int64(3) { meta, data := makeTestJob(i) row, err := JobToParquetRow(meta, data) if err != nil { @@ -263,7 +263,7 @@ func TestClusterAwareParquetWriter(t *testing.T) { cw.SetClusterConfig("alex", &schema.Cluster{Name: "alex"}) // Add jobs from different clusters - for i := int64(0); i < 3; i++ { + for i := range int64(3) { meta, data := makeTestJobForCluster(i, "fritz") row, err := JobToParquetRow(meta, data) if err != nil { diff --git a/pkg/metricstore/avroHelper.go b/pkg/metricstore/avroHelper.go index 62827afd..f6bef36e 100644 --- a/pkg/metricstore/avroHelper.go +++ b/pkg/metricstore/avroHelper.go @@ -9,6 +9,7 @@ import ( "context" "slices" "strconv" + "strings" "sync" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" @@ -44,11 +45,11 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { continue } - metricName := "" + var metricName strings.Builder for _, selectorName := range val.Selector { - metricName += selectorName + SelectorDelimiter + metricName.WriteString(selectorName + SelectorDelimiter) } - metricName += val.MetricName + metricName.WriteString(val.MetricName) var selector []string selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10)) @@ -62,7 +63,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { } if avroLevel != nil { - avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq)) + avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq)) } default: // No more messages, exit @@ -82,13 +83,13 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { continue } - metricName := "" + var metricName strings.Builder for _, selectorName := range val.Selector { - metricName += selectorName + SelectorDelimiter + metricName.WriteString(selectorName + SelectorDelimiter) } - metricName += val.MetricName + metricName.WriteString(val.MetricName) // Create a new selector for the Avro level // The selector is a slice of strings that represents the path to the @@ -109,7 +110,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { } if avroLevel != nil { - avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq)) + avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq)) } } } diff --git a/pkg/metricstore/buffer.go b/pkg/metricstore/buffer.go index 46eb5149..665d8012 100644 --- a/pkg/metricstore/buffer.go +++ b/pkg/metricstore/buffer.go @@ -237,9 +237,10 @@ func (b *buffer) free(t int64) (delme bool, n int) { n += m if delme { b.prev.next = nil - if cap(b.prev.data) == BufferCap { - bufferPool.Put(b.prev) + if cap(b.prev.data) != BufferCap { + b.prev.data = make([]schema.Float, 0, BufferCap) } + bufferPool.Put(b.prev) b.prev = nil } } diff --git a/pkg/metricstore/healthcheck.go b/pkg/metricstore/healthcheck.go index d6def692..73973ab0 100644 --- a/pkg/metricstore/healthcheck.go +++ b/pkg/metricstore/healthcheck.go @@ -133,6 +133,12 @@ func (m *MemoryStore) GetHealthyMetrics(selector []string, expectedMetrics []str return degradedList, missingList, nil } +type HealthCheckReq struct { + Cluster string `json:"cluster" example:"fritz"` + Nodes []string `json:"nodes"` + MetricNames []string `json:"metric-names"` +} + // HealthCheck evaluates multiple nodes against a set of expected metrics // and returns a monitoring state per node. // diff --git a/pkg/metricstore/level.go b/pkg/metricstore/level.go index bfa0ddf0..85c2ba7b 100644 --- a/pkg/metricstore/level.go +++ b/pkg/metricstore/level.go @@ -189,9 +189,10 @@ func (l *Level) free(t int64) (int, error) { delme, m := b.free(t) n += m if delme { - if cap(b.data) == BufferCap { - bufferPool.Put(b) + if cap(b.data) != BufferCap { + b.data = make([]schema.Float, 0, BufferCap) } + bufferPool.Put(b) l.metrics[i] = nil } } diff --git a/pkg/metricstore/metricstore_test.go b/pkg/metricstore/metricstore_test.go index a9ff0055..eb1aff15 100644 --- a/pkg/metricstore/metricstore_test.go +++ b/pkg/metricstore/metricstore_test.go @@ -143,7 +143,7 @@ func TestHealthCheck(t *testing.T) { // Setup test data for node003 - some metrics missing (no buffer) node003 := ms.root.findLevelOrCreate([]string{"testcluster", "node003"}, len(metrics)) // Only create buffers for first two metrics - for i := 0; i < 2; i++ { + for i := range 2 { node003.metrics[i] = newBuffer(startTime, 10) for ts := startTime; ts <= now; ts += 10 { node003.metrics[i].write(ts, schema.Float(float64(i+1))) diff --git a/pkg/metricstore/query.go b/pkg/metricstore/query.go index 709a9710..7dce5dcd 100644 --- a/pkg/metricstore/query.go +++ b/pkg/metricstore/query.go @@ -42,6 +42,13 @@ type InternalMetricStore struct{} var MetricStoreHandle *InternalMetricStore +// HealthCheck delegates to the internal MemoryStore's HealthCheck. +func (ccms *InternalMetricStore) HealthCheck(cluster string, + nodes []string, metrics []string, +) (map[string]HealthCheckResult, error) { + return GetMemoryStore().HealthCheck(cluster, nodes, metrics) +} + // TestLoadDataCallback allows tests to override LoadData behavior for testing purposes. // When set to a non-nil function, LoadData will call this function instead of the default implementation. var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) diff --git a/web/frontend/package-lock.json b/web/frontend/package-lock.json index e3451242..6962dc1b 100644 --- a/web/frontend/package-lock.json +++ b/web/frontend/package-lock.json @@ -250,7 +250,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -264,7 +263,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -278,7 +276,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -292,7 +289,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -306,7 +302,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -320,7 +315,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -334,7 +328,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -348,7 +341,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -362,7 +354,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -376,7 +367,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -390,7 +380,6 @@ "cpu": [ "loong64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -404,7 +393,6 @@ "cpu": [ "loong64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -418,7 +406,6 @@ "cpu": [ "ppc64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -432,7 +419,6 @@ "cpu": [ "ppc64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -446,7 +432,6 @@ "cpu": [ "riscv64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -460,7 +445,6 @@ "cpu": [ "riscv64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -474,7 +458,6 @@ "cpu": [ "s390x" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -488,7 +471,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -502,7 +484,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -516,7 +497,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -530,7 +510,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -544,7 +523,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -558,7 +536,6 @@ "cpu": [ "ia32" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -572,7 +549,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -586,7 +562,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -837,7 +812,6 @@ "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, "hasInstallScript": true, "license": "MIT", "optional": true, diff --git a/web/frontend/rollup.config.mjs b/web/frontend/rollup.config.mjs index 6b7cf884..8aca6161 100644 --- a/web/frontend/rollup.config.mjs +++ b/web/frontend/rollup.config.mjs @@ -75,5 +75,6 @@ export default [ entrypoint('analysis', 'src/analysis.entrypoint.js'), entrypoint('status', 'src/status.entrypoint.js'), entrypoint('dashpublic', 'src/dashpublic.entrypoint.js'), - entrypoint('config', 'src/config.entrypoint.js') + entrypoint('config', 'src/config.entrypoint.js'), + entrypoint('logs', 'src/logs.entrypoint.js') ]; diff --git a/web/frontend/src/Header.svelte b/web/frontend/src/Header.svelte index c173a9f4..862981fd 100644 --- a/web/frontend/src/Header.svelte +++ b/web/frontend/src/Header.svelte @@ -135,6 +135,16 @@ listOptions: true, menu: "Info", }, + { + title: "Logs", + // svelte-ignore state_referenced_locally + requiredRole: roles.admin, + href: "/monitoring/logs", + icon: "journal-text", + perCluster: false, + listOptions: false, + menu: "Info", + }, ]; /* State Init */ diff --git a/web/frontend/src/Logs.root.svelte b/web/frontend/src/Logs.root.svelte new file mode 100644 index 00000000..ccadabce --- /dev/null +++ b/web/frontend/src/Logs.root.svelte @@ -0,0 +1,254 @@ + + + + +{#if !isAdmin} + + +

Access denied. Admin privileges required.

+
+
+{:else} + + +
+ + + {#each timeRanges as tr} + + {/each} + + + + + + {#each levels as lv} + + {/each} + + + + + Lines + + + + + + + + + + { + if (e.key === "Enter") fetchLogs(); + }} + /> + + + + + + Auto + + {#each refreshIntervals as ri} + + {/each} + + + + {#if entries.length > 0} + {entries.length} entries + {/if} +
+
+ + {#if error} +
{error}
+ {/if} + +
+ + + + + + + + + + {#each entries as entry} + + + + + + {:else} + {#if !loading && !error} + + {/if} + {/each} + +
TimestampLevelMessage
{formatTimestamp(entry.timestamp)}{levelName(entry.priority)}{entry.message}
No log entries found
+
+
+
+{/if} diff --git a/web/frontend/src/generic/joblist/JobListRow.svelte b/web/frontend/src/generic/joblist/JobListRow.svelte index 9db340d4..3963708f 100644 --- a/web/frontend/src/generic/joblist/JobListRow.svelte +++ b/web/frontend/src/generic/joblist/JobListRow.svelte @@ -20,6 +20,7 @@ import { queryStore, gql, getContextClient } from "@urql/svelte"; import { Card, Spinner } from "@sveltestrap/sveltestrap"; import { maxScope, checkMetricAvailability } from "../utils.js"; + import uPlot from "uplot"; import JobInfo from "./JobInfo.svelte"; import MetricPlot from "../plots/MetricPlot.svelte"; import JobFootprint from "../helper/JobFootprint.svelte"; @@ -74,13 +75,17 @@ } `; + /* Var Init*/ + // svelte-ignore state_referenced_locally + let plotSync = uPlot.sync(`jobMetricStack-${job.cluster}-${job.id}`); + /* State Init */ let zoomStates = $state({}); let thresholdStates = $state({}); /* Derived */ const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0); - const jobId = $derived(job?.id); + const jobId = $derived(job.id); const scopes = $derived.by(() => { if (job.numNodes == 1) { if (job.numAcc >= 1) return ["core", "accelerator"]; @@ -233,6 +238,7 @@ numaccs={job.numAcc} zoomState={zoomStates[metric.data.name] || null} thresholdState={thresholdStates[metric.data.name] || null} + {plotSync} /> {:else} diff --git a/web/frontend/src/logs.entrypoint.js b/web/frontend/src/logs.entrypoint.js new file mode 100644 index 00000000..5eb3c0c8 --- /dev/null +++ b/web/frontend/src/logs.entrypoint.js @@ -0,0 +1,10 @@ +import { mount } from 'svelte'; +import {} from './header.entrypoint.js' +import Logs from './Logs.root.svelte' + +mount(Logs, { + target: document.getElementById('svelte-app'), + props: { + isAdmin: isAdmin, + } +}) diff --git a/web/frontend/src/status/DashDetails.svelte b/web/frontend/src/status/DashDetails.svelte index 9b6dda56..b46d0935 100644 --- a/web/frontend/src/status/DashDetails.svelte +++ b/web/frontend/src/status/DashDetails.svelte @@ -23,6 +23,7 @@ } from "@sveltestrap/sveltestrap"; import StatusDash from "./dashdetails/StatusDash.svelte"; + import HealthDash from "./dashdetails/HealthDash.svelte"; import UsageDash from "./dashdetails/UsageDash.svelte"; import StatisticsDash from "./dashdetails/StatisticsDash.svelte"; @@ -65,7 +66,13 @@ - + + + + + + + diff --git a/web/frontend/src/status/dashdetails/HealthDash.svelte b/web/frontend/src/status/dashdetails/HealthDash.svelte new file mode 100644 index 00000000..11f1ef31 --- /dev/null +++ b/web/frontend/src/status/dashdetails/HealthDash.svelte @@ -0,0 +1,396 @@ + + + + + + + + { + querySorting = { field: "startTime", type: "col", order: "DESC" }; + }} + /> + + + +
+ + +{#if $statusQuery.fetching} + + + + + +{:else if $statusQuery.error} + + + Status Query (States): {$statusQuery.error.message} + + +{:else if $statusQuery?.data?.nodeStates} + + +
+ {#key refinedStateData} +

+ Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States +

+ sd.count, + )} + entities={refinedStateData.map( + (sd) => sd.state, + )} + fixColors={refinedStateData.map( + (sd) => colors['nodeStates'][sd.state], + )} + /> + {/key} +
+ + + {#key refinedStateData} + + + + + + + {#each refinedStateData as sd, i} + + + + + + {/each} +
Current StateNodes
{sd.state}{sd.count}
+ {/key} + + + +
+ {#key refinedHealthData} +

+ Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health +

+ hd.count, + )} + entities={refinedHealthData.map( + (hd) => hd.state, + )} + fixColors={refinedHealthData.map( + (hd) => colors['healthStates'][hd.state], + )} + /> + {/key} +
+ + + {#key refinedHealthData} + + + + + + + {#each refinedHealthData as hd, i} + + + + + + {/each} +
Current HealthNodes
{hd.state}{hd.count}
+ {/key} + +
+{/if} + +
+ + +{#if $statusQuery.fetching} + + + + + +{:else if $statusQuery.error} + + + Status Query (Details): {$statusQuery.error.message} + + +{:else if $statusQuery.data} + + + + + + + + + + + + + + + + + + + + + + + + {#each filteredTableData as host (host.hostname)} + + + + + + + + {/each} + +
sortBy('hostname')}> + Hosts ({filteredTableData.length}) + + sortBy('schedulerState')}> + Scheduler State + + sortBy('healthState')}> + Health State + + Metric AvailabilityMeta Information
+ + + + + + + + + + {#each stateOptions as so} + + {/each} + + + + + + + + + {#each healthOptions as ho} + + {/each} + + + + + +
{host.hostname}{host.schedulerState}{host.healthState} + {#each Object.keys(host.healthData) as hkey} +

+ {hkey}: {host.healthData[hkey]} +

+ {/each} +
+ {#each Object.keys(host.metaData) as mkey} +

+ {mkey}: {host.metaData[mkey]} +

+ {/each} +
+
+ +
+{:else} + Cannot render metric health info: No data! +{/if} diff --git a/web/frontend/src/status/dashdetails/StatusDash.svelte b/web/frontend/src/status/dashdetails/StatusDash.svelte index 730ecdcc..8d108964 100644 --- a/web/frontend/src/status/dashdetails/StatusDash.svelte +++ b/web/frontend/src/status/dashdetails/StatusDash.svelte @@ -15,7 +15,6 @@ CardBody, Table, Progress, - Icon, Spinner } from "@sveltestrap/sveltestrap"; import { @@ -27,22 +26,18 @@ import Refresher from "../../generic/helper/Refresher.svelte"; import TimeSelection from "../../generic/select/TimeSelection.svelte"; import Roofline from "../../generic/plots/Roofline.svelte"; - import Pie, { colors } from "../../generic/plots/Pie.svelte"; import Stacked from "../../generic/plots/Stacked.svelte"; /* Svelte 5 Props */ let { clusters, presetCluster, - useCbColors = false, - useAltColors = false, } = $props(); /* Const Init */ const client = getContextClient(); /* State Init */ - let pieWidth = $state(0); let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let to = $state(new Date(Date.now())); let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400); @@ -163,11 +158,6 @@ schedulerState } } - # Get Current States fir Pie Charts - nodeStates(filter: $nodeFilter) { - state - count - } # totalNodes includes multiples if shared jobs jobsStatistics( filter: $jobFilter @@ -196,18 +186,6 @@ requestPolicy: "network-only" })); - const refinedStateData = $derived.by(() => { - return $statusQuery?.data?.nodeStates. - filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)). - sort((a, b) => b.count - a.count) - }); - - const refinedHealthData = $derived.by(() => { - return $statusQuery?.data?.nodeStates. - filter((e) => ['full', 'partial', 'failed'].includes(e.state)). - sort((a, b) => b.count - a.count) - }); - /* Effects */ $effect(() => { if ($statusQuery.data) { @@ -367,19 +345,6 @@ return result } - function legendColors(targetIdx) { - // Reuses first color if targetIdx overflows - let c; - if (useCbColors) { - c = [...colors['colorblind']]; - } else if (useAltColors) { - c = [...colors['alternative']]; - } else { - c = [...colors['default']]; - } - return c[(c.length + targetIdx) % c.length]; - } - @@ -408,7 +373,7 @@
- + {#if $statesTimed.fetching} @@ -460,109 +425,6 @@ {/if} -
- - -{#if $statusQuery.fetching} - - - - - -{:else if $statusQuery.error} - - - Status Query (States): {$statesTimed.error.message} - - -{:else if $statusQuery?.data?.nodeStates} - - -
- {#key refinedStateData} -

- Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States -

- sd.count, - )} - entities={refinedStateData.map( - (sd) => sd.state, - )} - fixColors={refinedStateData.map( - (sd) => colors['nodeStates'][sd.state], - )} - /> - {/key} -
- - - {#key refinedStateData} - - - - - - - {#each refinedStateData as sd, i} - - - - - - {/each} -
Current StateNodes
{sd.state}{sd.count}
- {/key} - - - -
- {#key refinedHealthData} -

- Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health -

- hd.count, - )} - entities={refinedHealthData.map( - (hd) => hd.state, - )} - fixColors={refinedHealthData.map( - (hd) => colors['healthStates'][hd.state], - )} - /> - {/key} -
- - - {#key refinedHealthData} - - - - - - - {#each refinedHealthData as hd, i} - - - - - - {/each} -
Current HealthNodes
{hd.state}{hd.count}
- {/key} - -
-{/if} -
{#if $statusQuery.fetching} diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index 4689ed21..558d0642 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -211,6 +211,7 @@ timestep={metricData.data.metric.timestep} series={metricData.data.metric.series} height={375} + {plotSync} forNode /> {/if} diff --git a/web/templates/monitoring/logs.tmpl b/web/templates/monitoring/logs.tmpl new file mode 100644 index 00000000..1613edc1 --- /dev/null +++ b/web/templates/monitoring/logs.tmpl @@ -0,0 +1,13 @@ +{{define "content"}} +
+{{end}} + +{{define "stylesheets"}} + +{{end}} +{{define "javascript"}} + + +{{end}}