mirror of
https://github.com/ClusterCockpit/cc-metric-store.git
synced 2026-02-02 01:31:46 +01:00
Add configuration options and enable dynamic memory management through cc-backend callback
This commit is contained in:
77
CLAUDE.md
Normal file
77
CLAUDE.md
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
cc-metric-store is an in-memory time-series database for HPC cluster metrics, part of the ClusterCockpit monitoring suite. Data is indexed by a hierarchical tree (cluster → host → socket/cpu/gpu) and accessed via selectors. The core storage engine lives in `cc-backend/pkg/metricstore`; this repo provides the HTTP API wrapper.
|
||||||
|
|
||||||
|
## Build Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make # Build binary, copy config template, create checkpoint dirs
|
||||||
|
make clean # Clean build cache and binary
|
||||||
|
make distclean # Also remove ./var and config.json
|
||||||
|
make swagger # Regenerate Swagger from source comments
|
||||||
|
make test # Run go build, go vet, go test
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go test -v ./... # Run tests
|
||||||
|
go test -bench=. -race -v ./... # With benchmarks and race detector
|
||||||
|
```
|
||||||
|
|
||||||
|
Integration test scripts in `/endpoint-test-scripts/` for manual API testing.
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./cc-metric-store # Uses ./config.json
|
||||||
|
./cc-metric-store -config /path/to/config.json
|
||||||
|
./cc-metric-store -dev # Enable Swagger UI at /swagger/
|
||||||
|
./cc-metric-store -loglevel debug # debug|info|warn|err|crit
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
**Entry point:** `cmd/cc-metric-store/main.go`
|
||||||
|
- `run()` → parse flags, init logging/config, connect NATS
|
||||||
|
- `runServer()` → init metricstore from cc-backend, start HTTP server
|
||||||
|
|
||||||
|
**Key packages:**
|
||||||
|
- `internal/api/` - REST endpoints (query, write, free, debug, healthcheck) and JWT auth (Ed25519)
|
||||||
|
- `internal/config/` - Config loading and JSON schema validation
|
||||||
|
- External: `cc-backend/pkg/metricstore` - actual time-series storage engine
|
||||||
|
|
||||||
|
**API endpoints** (all support optional JWT auth):
|
||||||
|
- `GET /api/query/` - Query metrics with selectors
|
||||||
|
- `POST /api/write/` - Write metrics (InfluxDB line protocol)
|
||||||
|
- `POST /api/free/` - Free buffers up to timestamp
|
||||||
|
- `GET /api/debug/` - Dump internal state
|
||||||
|
- `GET /api/healthcheck/` - Node health status
|
||||||
|
|
||||||
|
## Selectors
|
||||||
|
|
||||||
|
Data is accessed via hierarchical selectors:
|
||||||
|
```
|
||||||
|
["cluster1", "host1", "cpu0"] # Specific CPU
|
||||||
|
["cluster1", "host1", ["cpu4", "cpu5"]] # Multiple CPUs
|
||||||
|
["cluster1", "host1"] # Entire node (all CPUs implied)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Config file structure (see `configs/config.json`):
|
||||||
|
- `main` - Server address, TLS certs, JWT public key, user/group for privilege drop
|
||||||
|
- `metrics` - Per-metric frequency and aggregation strategy (sum/avg/null)
|
||||||
|
- `metric-store` - Checkpoints, memory cap, retention, cleanup mode, NATS subscriptions
|
||||||
|
- `nats` - Optional NATS connection for receiving metrics
|
||||||
|
|
||||||
|
## Test JWT
|
||||||
|
|
||||||
|
For testing with JWT auth enabled:
|
||||||
|
```
|
||||||
|
eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw
|
||||||
|
```
|
||||||
@@ -19,6 +19,7 @@ import (
|
|||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/runtime"
|
"github.com/ClusterCockpit/cc-lib/v2/runtime"
|
||||||
|
"github.com/ClusterCockpit/cc-metric-store/internal/api"
|
||||||
"github.com/ClusterCockpit/cc-metric-store/internal/config"
|
"github.com/ClusterCockpit/cc-metric-store/internal/config"
|
||||||
"github.com/google/gops/agent"
|
"github.com/google/gops/agent"
|
||||||
)
|
)
|
||||||
@@ -43,14 +44,25 @@ func printVersion() {
|
|||||||
func runServer(ctx context.Context) error {
|
func runServer(ctx context.Context) error {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
// Initialize metric store if configuration is provided
|
mscfg := ccconf.GetPackageConfig("metrics")
|
||||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
if mscfg == nil {
|
||||||
if mscfg != nil {
|
return fmt.Errorf("missing metrics configuration")
|
||||||
metricstore.Init(mscfg, &wg)
|
}
|
||||||
} else {
|
config.InitMetrics(mscfg)
|
||||||
|
|
||||||
|
mscfg = ccconf.GetPackageConfig("metric-store")
|
||||||
|
if mscfg == nil {
|
||||||
return fmt.Errorf("missing metricstore configuration")
|
return fmt.Errorf("missing metricstore configuration")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metricstore.Init(mscfg, config.GetMetrics(), &wg)
|
||||||
|
|
||||||
|
if config.Keys.BackendURL != "" {
|
||||||
|
ms := metricstore.GetMemoryStore()
|
||||||
|
ms.SetNodeProvider(api.NewBackendNodeProvider(config.Keys.BackendURL))
|
||||||
|
cclog.Infof("Node provider configured with backend URL: %s", config.Keys.BackendURL)
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize HTTP server
|
// Initialize HTTP server
|
||||||
srv, err := NewServer(version, commit, date)
|
srv, err := NewServer(version, commit, date)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
11
go.mod
11
go.mod
@@ -3,8 +3,8 @@ module github.com/ClusterCockpit/cc-metric-store
|
|||||||
go 1.24.0
|
go 1.24.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/ClusterCockpit/cc-backend v1.4.4-0.20260126082752-084d00cb0d0c
|
github.com/ClusterCockpit/cc-backend v1.4.4-0.20260128102127-0d857b49a256
|
||||||
github.com/ClusterCockpit/cc-lib/v2 v2.2.0
|
github.com/ClusterCockpit/cc-lib/v2 v2.2.1
|
||||||
github.com/golang-jwt/jwt/v4 v4.5.0
|
github.com/golang-jwt/jwt/v4 v4.5.0
|
||||||
github.com/google/gops v0.3.28
|
github.com/google/gops v0.3.28
|
||||||
github.com/influxdata/line-protocol/v2 v2.2.1
|
github.com/influxdata/line-protocol/v2 v2.2.1
|
||||||
@@ -34,7 +34,6 @@ require (
|
|||||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 // indirect
|
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect
|
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect
|
||||||
github.com/aws/smithy-go v1.24.0 // indirect
|
github.com/aws/smithy-go v1.24.0 // indirect
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
|
||||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||||
github.com/go-openapi/jsonpointer v0.22.3 // indirect
|
github.com/go-openapi/jsonpointer v0.22.3 // indirect
|
||||||
github.com/go-openapi/jsonreference v0.21.3 // indirect
|
github.com/go-openapi/jsonreference v0.21.3 // indirect
|
||||||
@@ -53,19 +52,13 @@ require (
|
|||||||
github.com/nats-io/nats.go v1.47.0 // indirect
|
github.com/nats-io/nats.go v1.47.0 // indirect
|
||||||
github.com/nats-io/nkeys v0.4.12 // indirect
|
github.com/nats-io/nkeys v0.4.12 // indirect
|
||||||
github.com/nats-io/nuid v1.0.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
|
||||||
github.com/swaggo/files v1.0.1 // indirect
|
github.com/swaggo/files v1.0.1 // indirect
|
||||||
github.com/urfave/cli/v2 v2.27.7 // indirect
|
|
||||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
|
||||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
|
||||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||||
golang.org/x/crypto v0.46.0 // indirect
|
golang.org/x/crypto v0.46.0 // indirect
|
||||||
golang.org/x/mod v0.31.0 // indirect
|
golang.org/x/mod v0.31.0 // indirect
|
||||||
golang.org/x/net v0.48.0 // indirect
|
golang.org/x/net v0.48.0 // indirect
|
||||||
golang.org/x/sync v0.19.0 // indirect
|
golang.org/x/sync v0.19.0 // indirect
|
||||||
golang.org/x/sys v0.39.0 // indirect
|
golang.org/x/sys v0.39.0 // indirect
|
||||||
golang.org/x/text v0.32.0 // indirect
|
|
||||||
golang.org/x/tools v0.40.0 // indirect
|
golang.org/x/tools v0.40.0 // indirect
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||||
sigs.k8s.io/yaml v1.6.0 // indirect
|
|
||||||
)
|
)
|
||||||
|
|||||||
20
go.sum
20
go.sum
@@ -1,7 +1,7 @@
|
|||||||
github.com/ClusterCockpit/cc-backend v1.4.4-0.20260126082752-084d00cb0d0c h1:rN1M3afMjlW4GUsa5jiR5OKA23IVpoeMrkbVlpk2sWw=
|
github.com/ClusterCockpit/cc-backend v1.4.4-0.20260128102127-0d857b49a256 h1:PL8UhUBe+G6j1JoXybx27eKhgVgQ+Z0fQnvbVD3OmGA=
|
||||||
github.com/ClusterCockpit/cc-backend v1.4.4-0.20260126082752-084d00cb0d0c/go.mod h1:RDlfymO/WgrcZ1eDxGpur2jTEFoMA8BfJUvV+Heb+E4=
|
github.com/ClusterCockpit/cc-backend v1.4.4-0.20260128102127-0d857b49a256/go.mod h1:y5LuqfWrSnVYjMaxseBwq72Tx4NpyQWwTHXwKsYAPUk=
|
||||||
github.com/ClusterCockpit/cc-lib/v2 v2.2.0 h1:gqMsh7zsJMUhaXviXzaZ3gqXcLVgerjRJHzIcwX4FmQ=
|
github.com/ClusterCockpit/cc-lib/v2 v2.2.1 h1:iCVas+Jc61zFH5S2VG3H1sc7tsn+U4lOJwUYjYZEims=
|
||||||
github.com/ClusterCockpit/cc-lib/v2 v2.2.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
|
github.com/ClusterCockpit/cc-lib/v2 v2.2.1/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
|
||||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||||
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
|
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
|
||||||
@@ -50,8 +50,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
|||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
@@ -155,8 +153,6 @@ github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzM
|
|||||||
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
|
||||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY=
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
@@ -171,10 +167,6 @@ github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64
|
|||||||
github.com/swaggo/http-swagger v1.3.4/go.mod h1:9dAh0unqMBAlbp1uE2Uc2mQTxNMU/ha4UbucIg1MFkQ=
|
github.com/swaggo/http-swagger v1.3.4/go.mod h1:9dAh0unqMBAlbp1uE2Uc2mQTxNMU/ha4UbucIg1MFkQ=
|
||||||
github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
|
github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
|
||||||
github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
|
github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
|
||||||
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
|
|
||||||
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
|
|
||||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg=
|
|
||||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
|
||||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
||||||
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
||||||
@@ -214,8 +206,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
|||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||||
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
|
|
||||||
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
|
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
@@ -233,5 +223,3 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
|
|||||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
|
|
||||||
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
|
|
||||||
|
|||||||
54
internal/api/nodeprovider.go
Normal file
54
internal/api/nodeprovider.go
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-metric-store.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BackendNodeProvider implements metricstore.NodeProvider by querying
|
||||||
|
// the cc-backend /api/jobs/used_nodes endpoint.
|
||||||
|
type BackendNodeProvider struct {
|
||||||
|
backendUrl string
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewBackendNodeProvider creates a new BackendNodeProvider that queries
|
||||||
|
// the given cc-backend URL for used nodes information.
|
||||||
|
func NewBackendNodeProvider(backendUrl string) *BackendNodeProvider {
|
||||||
|
return &BackendNodeProvider{
|
||||||
|
backendUrl: backendUrl,
|
||||||
|
client: &http.Client{
|
||||||
|
Timeout: 10 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames
|
||||||
|
// that are currently in use by jobs that started before the given timestamp.
|
||||||
|
func (p *BackendNodeProvider) GetUsedNodes(ts int64) (map[string][]string, error) {
|
||||||
|
url := fmt.Sprintf("%s/api/jobs/used_nodes?ts=%d", p.backendUrl, ts)
|
||||||
|
|
||||||
|
resp, err := p.client.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("querying used nodes from backend: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("backend returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var result map[string][]string
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||||
|
return nil, fmt.Errorf("decoding used nodes response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
@@ -10,57 +10,20 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// For aggregation over multiple values at different cpus/sockets/..., not time!
|
var metrics map[string]metricstore.MetricConfig
|
||||||
type AggregationStrategy int
|
|
||||||
|
|
||||||
const (
|
|
||||||
NoAggregation AggregationStrategy = iota
|
|
||||||
SumAggregation
|
|
||||||
AvgAggregation
|
|
||||||
)
|
|
||||||
|
|
||||||
func (as *AggregationStrategy) UnmarshalJSON(data []byte) error {
|
|
||||||
var str string
|
|
||||||
if err := json.Unmarshal(data, &str); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
switch str {
|
|
||||||
case "":
|
|
||||||
*as = NoAggregation
|
|
||||||
case "sum":
|
|
||||||
*as = SumAggregation
|
|
||||||
case "avg":
|
|
||||||
*as = AvgAggregation
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("invalid aggregation strategy: %#v", str)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type MetricConfig struct {
|
|
||||||
// Interval in seconds at which measurements will arive.
|
|
||||||
Frequency int64 `json:"frequency"`
|
|
||||||
|
|
||||||
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
|
|
||||||
Aggregation AggregationStrategy `json:"aggregation"`
|
|
||||||
|
|
||||||
// Private, used internally...
|
|
||||||
Offset int
|
|
||||||
}
|
|
||||||
|
|
||||||
var metrics map[string]MetricConfig
|
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Address string `json:"addr"`
|
Address string `json:"addr"`
|
||||||
CertFile string `json:"https-cert-file"`
|
CertFile string `json:"https-cert-file"`
|
||||||
KeyFile string `json:"https-key-file"`
|
KeyFile string `json:"https-key-file"`
|
||||||
User string `json:"user"`
|
User string `json:"user"`
|
||||||
Group string `json:"group"`
|
Group string `json:"group"`
|
||||||
Debug struct {
|
BackendURL string `json:"backend-url"`
|
||||||
|
Debug struct {
|
||||||
DumpToFile string `json:"dump-to-file"`
|
DumpToFile string `json:"dump-to-file"`
|
||||||
EnableGops bool `json:"gops"`
|
EnableGops bool `json:"gops"`
|
||||||
} `json:"debug"`
|
} `json:"debug"`
|
||||||
@@ -69,13 +32,32 @@ type Config struct {
|
|||||||
|
|
||||||
var Keys Config
|
var Keys Config
|
||||||
|
|
||||||
|
type metricConfigJSON struct {
|
||||||
|
Frequency int64 `json:"frequency"`
|
||||||
|
Aggregation string `json:"aggregation"`
|
||||||
|
}
|
||||||
|
|
||||||
func InitMetrics(metricConfig json.RawMessage) {
|
func InitMetrics(metricConfig json.RawMessage) {
|
||||||
Validate(metricConfigSchema, metricConfig)
|
Validate(metricConfigSchema, metricConfig)
|
||||||
|
|
||||||
|
var tempMetrics map[string]metricConfigJSON
|
||||||
dec := json.NewDecoder(bytes.NewReader(metricConfig))
|
dec := json.NewDecoder(bytes.NewReader(metricConfig))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
if err := dec.Decode(&metrics); err != nil {
|
if err := dec.Decode(&tempMetrics); err != nil {
|
||||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", metricConfig, err.Error())
|
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", metricConfig, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metrics = make(map[string]metricstore.MetricConfig)
|
||||||
|
for name, cfg := range tempMetrics {
|
||||||
|
agg, err := metricstore.AssignAggregationStrategy(cfg.Aggregation)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("Could not parse aggregation strategy for metric '%s': %s", name, err.Error())
|
||||||
|
}
|
||||||
|
metrics[name] = metricstore.MetricConfig{
|
||||||
|
Frequency: cfg.Frequency,
|
||||||
|
Aggregation: agg,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Init(mainConfig json.RawMessage) {
|
func Init(mainConfig json.RawMessage) {
|
||||||
@@ -93,3 +75,7 @@ func GetMetricFrequency(metricName string) (int64, error) {
|
|||||||
}
|
}
|
||||||
return 0, fmt.Errorf("metric %s not found", metricName)
|
return 0, fmt.Errorf("metric %s not found", metricName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetMetrics() map[string]metricstore.MetricConfig {
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|||||||
@@ -29,6 +29,10 @@ var configSchema = `
|
|||||||
"description": "Drop root permissions once the port was taken. Only applicable if using privileged port.",
|
"description": "Drop root permissions once the port was taken. Only applicable if using privileged port.",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"backend-url": {
|
||||||
|
"description": "URL of cc-backend for querying job information (e.g., 'https://localhost:8080').",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
"debug": {
|
"debug": {
|
||||||
"description": "Debug options.",
|
"description": "Debug options.",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|||||||
Reference in New Issue
Block a user