Merge pull request #405 from ClusterCockpit/metricstore

Metricstore Integration
This commit is contained in:
Jan Eitzinger
2025-09-10 09:17:33 +02:00
committed by GitHub
64 changed files with 4648 additions and 833 deletions

View File

@@ -7,7 +7,7 @@ jobs:
- name: Install Go - name: Install Go
uses: actions/setup-go@v4 uses: actions/setup-go@v4
with: with:
go-version: 1.24.x go-version: 1.25.x
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Build, Vet & Test - name: Build, Vet & Test

5
.gitignore vendored
View File

@@ -9,6 +9,11 @@
/var/*.db /var/*.db
/var/*.txt /var/*.txt
/var/checkpoints*
migrateTimestamps.pl
test_ccms_write_api.sh
/web/frontend/public/build /web/frontend/public/build
/web/frontend/node_modules /web/frontend/node_modules

View File

@@ -38,7 +38,7 @@ type Job {
numAcc: Int! numAcc: Int!
energy: Float! energy: Float!
SMT: Int! SMT: Int!
exclusive: Int! shared: String!
partition: String! partition: String!
arrayJobId: Int! arrayJobId: Int!
monitoringStatus: Int! monitoringStatus: Int!
@@ -425,7 +425,7 @@ input JobFilter {
startTime: TimeRange startTime: TimeRange
state: [JobState!] state: [JobState!]
metricStats: [MetricStatItem!] metricStats: [MetricStatItem!]
exclusive: Int shared: String
node: StringInput node: StringInput
} }

View File

@@ -1394,12 +1394,6 @@
"format": "float64" "format": "float64"
} }
}, },
"exclusive": {
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"footprint": { "footprint": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
@@ -1416,12 +1410,18 @@
}, },
"jobState": { "jobState": {
"enum": [ "enum": [
"completed", "boot_fail",
"failed",
"cancelled", "cancelled",
"stopped", "completed",
"timeout", "deadline",
"out_of_memory" "failed",
"node_fail",
"out-of-memory",
"pending",
"preempted",
"running",
"suspended",
"timeout"
], ],
"allOf": [ "allOf": [
{ {
@@ -1477,6 +1477,14 @@
"$ref": "#/definitions/schema.Resource" "$ref": "#/definitions/schema.Resource"
} }
}, },
"shared": {
"type": "string",
"enum": [
"none",
"single_user",
"multi_user"
]
},
"smt": { "smt": {
"type": "integer", "type": "integer",
"example": 4 "example": 4

View File

@@ -207,11 +207,6 @@ definitions:
format: float64 format: float64
type: number type: number
type: object type: object
exclusive:
example: 1
maximum: 2
minimum: 0
type: integer
footprint: footprint:
additionalProperties: additionalProperties:
format: float64 format: float64
@@ -226,12 +221,18 @@ definitions:
allOf: allOf:
- $ref: '#/definitions/schema.JobState' - $ref: '#/definitions/schema.JobState'
enum: enum:
- completed - boot_fail
- failed
- cancelled - cancelled
- stopped - completed
- deadline
- failed
- node_fail
- out-of-memory
- pending
- preempted
- running
- suspended
- timeout - timeout
- out_of_memory
example: completed example: completed
metaData: metaData:
additionalProperties: additionalProperties:
@@ -269,6 +270,12 @@ definitions:
items: items:
$ref: '#/definitions/schema.Resource' $ref: '#/definitions/schema.Resource'
type: array type: array
shared:
enum:
- none
- single_user
- multi_user
type: string
smt: smt:
example: 4 example: 4
type: integer type: integer

View File

@@ -18,6 +18,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/importer" "github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/tagger" "github.com/ClusterCockpit/cc-backend/internal/tagger"
@@ -96,6 +97,12 @@ func main() {
} else { } else {
cclog.Abort("Cluster configuration must be present") cclog.Abort("Cluster configuration must be present")
} }
if mscfg := ccconf.GetPackageConfig("metric-store"); mscfg != nil {
config.InitMetricStore(mscfg)
} else {
cclog.Abort("Metric Store configuration must be present")
}
} else { } else {
cclog.Abort("Main configuration must be present") cclog.Abort("Main configuration must be present")
} }
@@ -201,7 +208,7 @@ func main() {
if archiveCfg := ccconf.GetPackageConfig("archive"); archiveCfg != nil { if archiveCfg := ccconf.GetPackageConfig("archive"); archiveCfg != nil {
err = archive.Init(archiveCfg, config.Keys.DisableArchive) err = archive.Init(archiveCfg, config.Keys.DisableArchive)
} else { } else {
err = archive.Init(json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`), config.Keys.DisableArchive) err = archive.Init(json.RawMessage("{\"kind\":\"file\",\"path\":\"./var/job-archive\"}"), config.Keys.DisableArchive)
} }
if err != nil { if err != nil {
cclog.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error()) cclog.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error())
@@ -241,13 +248,18 @@ func main() {
cclog.Exit("No errors, server flag not set. Exiting cc-backend.") cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
} }
var wg sync.WaitGroup
//Metric Store starts after all flags have been processes
memorystore.Init(&wg)
archiver.Start(repository.GetJobRepository()) archiver.Start(repository.GetJobRepository())
// // Comment out
taskManager.Start(ccconf.GetPackageConfig("cron"), taskManager.Start(ccconf.GetPackageConfig("cron"),
ccconf.GetPackageConfig("archive")) ccconf.GetPackageConfig("archive"))
serverInit()
var wg sync.WaitGroup serverInit()
wg.Add(1) wg.Add(1)
go func() { go func() {

View File

@@ -26,6 +26,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated" "github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig" "github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/web" "github.com/ClusterCockpit/cc-backend/web"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger" cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
@@ -118,6 +119,7 @@ func serverInit() {
userapi := router.PathPrefix("/userapi").Subrouter() userapi := router.PathPrefix("/userapi").Subrouter()
configapi := router.PathPrefix("/config").Subrouter() configapi := router.PathPrefix("/config").Subrouter()
frontendapi := router.PathPrefix("/frontend").Subrouter() frontendapi := router.PathPrefix("/frontend").Subrouter()
metricstoreapi := router.PathPrefix("/metricstore").Subrouter()
if !config.Keys.DisableAuthentication { if !config.Keys.DisableAuthentication {
router.Handle("/login", authHandle.Login( router.Handle("/login", authHandle.Login(
@@ -198,6 +200,14 @@ func serverInit() {
onFailureResponse) onFailureResponse)
}) })
metricstoreapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthMetricStoreApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
configapi.Use(func(next http.Handler) http.Handler { configapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthConfigApi( return authHandle.AuthConfigApi(
// On success; // On success;
@@ -231,6 +241,7 @@ func serverInit() {
routerConfig.SetupRoutes(secured, buildInfo) routerConfig.SetupRoutes(secured, buildInfo)
apiHandle.MountApiRoutes(securedapi) apiHandle.MountApiRoutes(securedapi)
apiHandle.MountUserApiRoutes(userapi) apiHandle.MountUserApiRoutes(userapi)
apiHandle.MountMetricStoreApiRoutes(metricstoreapi)
apiHandle.MountConfigApiRoutes(configapi) apiHandle.MountConfigApiRoutes(configapi)
apiHandle.MountFrontendApiRoutes(frontendapi) apiHandle.MountFrontendApiRoutes(frontendapi)
@@ -325,6 +336,9 @@ func serverShutdown() {
// First shut down the server gracefully (waiting for all ongoing requests) // First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background()) server.Shutdown(context.Background())
//Archive all the metric store data
memorystore.Shutdown()
// Then, wait for any async archivings still pending... // Then, wait for any async archivings still pending...
archiver.WaitForArchiving() archiver.WaitForArchiving()
} }

View File

@@ -4,11 +4,27 @@
"short-running-jobs-duration": 300, "short-running-jobs-duration": 300,
"resampling": { "resampling": {
"trigger": 30, "trigger": 30,
"resolutions": [600, 300, 120, 60] "resolutions": [
600,
300,
120,
60
]
}, },
"apiAllowedIPs": ["*"], "apiAllowedIPs": [
"*"
],
"emission-constant": 317 "emission-constant": 317
}, },
"cron": {
"commit-job-worker": "2m",
"duration-worker": "5m",
"footprint-worker": "10m"
},
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"auth": { "auth": {
"jwts": { "jwts": {
"max-age": "2000h" "max-age": "2000h"
@@ -18,9 +34,7 @@
{ {
"name": "fritz", "name": "fritz",
"metricDataRepository": { "metricDataRepository": {
"kind": "cc-metric-store", "kind": "cc-metric-store"
"url": "http://localhost:8082",
"token": ""
}, },
"filterRanges": { "filterRanges": {
"numNodes": { "numNodes": {
@@ -40,9 +54,7 @@
{ {
"name": "alex", "name": "alex",
"metricDataRepository": { "metricDataRepository": {
"kind": "cc-metric-store", "kind": "cc-metric-store"
"url": "http://localhost:8082",
"token": ""
}, },
"filterRanges": { "filterRanges": {
"numNodes": { "numNodes": {
@@ -59,5 +71,18 @@
} }
} }
} }
] ],
} "metric-store": {
"checkpoints": {
"file-format": "avro",
"interval": "2h",
"directory": "./var/checkpoints",
"restore": "48h"
},
"archive": {
"interval": "2h",
"directory": "./var/archive"
},
"retention-in-memory": "48h"
}
}

View File

@@ -6,13 +6,29 @@
"user": "clustercockpit", "user": "clustercockpit",
"group": "clustercockpit", "group": "clustercockpit",
"validate": false, "validate": false,
"apiAllowedIPs": ["*"], "apiAllowedIPs": [
"*"
],
"short-running-jobs-duration": 300, "short-running-jobs-duration": 300,
"resampling": { "resampling": {
"trigger": 30, "trigger": 30,
"resolutions": [600, 300, 120, 60] "resolutions": [
600,
300,
120,
60
]
} }
}, },
"cron": {
"commit-job-worker": "2m",
"duration-worker": "5m",
"footprint-worker": "10m"
},
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [ "clusters": [
{ {
"name": "test", "name": "test",
@@ -37,4 +53,4 @@
} }
} }
] ]
} }

28
go.mod
View File

@@ -6,10 +6,10 @@ toolchain go1.24.1
require ( require (
github.com/99designs/gqlgen v0.17.78 github.com/99designs/gqlgen v0.17.78
github.com/ClusterCockpit/cc-lib v0.7.0 github.com/ClusterCockpit/cc-lib v0.8.0
github.com/Masterminds/squirrel v1.5.4 github.com/Masterminds/squirrel v1.5.4
github.com/coreos/go-oidc/v3 v3.12.0 github.com/coreos/go-oidc/v3 v3.12.0
github.com/expr-lang/expr v1.17.5 github.com/expr-lang/expr v1.17.6
github.com/go-co-op/gocron/v2 v2.16.0 github.com/go-co-op/gocron/v2 v2.16.0
github.com/go-ldap/ldap/v3 v3.4.10 github.com/go-ldap/ldap/v3 v3.4.10
github.com/go-sql-driver/mysql v1.9.0 github.com/go-sql-driver/mysql v1.9.0
@@ -19,19 +19,22 @@ require (
github.com/gorilla/handlers v1.5.2 github.com/gorilla/handlers v1.5.2
github.com/gorilla/mux v1.8.1 github.com/gorilla/mux v1.8.1
github.com/gorilla/sessions v1.4.0 github.com/gorilla/sessions v1.4.0
github.com/influxdata/line-protocol/v2 v2.2.1
github.com/jmoiron/sqlx v1.4.0 github.com/jmoiron/sqlx v1.4.0
github.com/joho/godotenv v1.5.1 github.com/joho/godotenv v1.5.1
github.com/linkedin/goavro/v2 v2.14.0
github.com/mattn/go-sqlite3 v1.14.24 github.com/mattn/go-sqlite3 v1.14.24
github.com/prometheus/client_golang v1.23.0 github.com/nats-io/nats.go v1.45.0
github.com/prometheus/common v0.65.0 github.com/prometheus/client_golang v1.23.2
github.com/prometheus/common v0.66.1
github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/qustavo/sqlhooks/v2 v2.1.0
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
github.com/swaggo/http-swagger v1.3.4 github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.16.6 github.com/swaggo/swag v1.16.6
github.com/vektah/gqlparser/v2 v2.5.30 github.com/vektah/gqlparser/v2 v2.5.30
golang.org/x/crypto v0.40.0 golang.org/x/crypto v0.41.0
golang.org/x/oauth2 v0.30.0 golang.org/x/oauth2 v0.30.0
golang.org/x/time v0.5.0 golang.org/x/time v0.12.0
) )
require ( require (
@@ -51,6 +54,7 @@ require (
github.com/go-openapi/spec v0.21.0 // indirect github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/swag v0.23.1 // indirect github.com/go-openapi/swag v0.23.1 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/uuid v1.6.0 // indirect github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/securecookie v1.1.2 // indirect
github.com/gorilla/websocket v1.5.3 // indirect github.com/gorilla/websocket v1.5.3 // indirect
@@ -61,6 +65,7 @@ require (
github.com/josharian/intern v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect
@@ -68,6 +73,8 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/nats-io/nkeys v0.4.11 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/procfs v0.16.1 // indirect github.com/prometheus/procfs v0.16.1 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect
@@ -80,13 +87,12 @@ require (
go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
golang.org/x/mod v0.26.0 // indirect golang.org/x/mod v0.26.0 // indirect
golang.org/x/net v0.42.0 // indirect golang.org/x/net v0.43.0 // indirect
golang.org/x/sync v0.16.0 // indirect golang.org/x/sync v0.16.0 // indirect
golang.org/x/sys v0.34.0 // indirect golang.org/x/sys v0.35.0 // indirect
golang.org/x/text v0.27.0 // indirect golang.org/x/text v0.28.0 // indirect
golang.org/x/tools v0.35.0 // indirect golang.org/x/tools v0.35.0 // indirect
google.golang.org/protobuf v1.36.6 // indirect google.golang.org/protobuf v1.36.8 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.6.0 // indirect sigs.k8s.io/yaml v1.6.0 // indirect
) )

79
go.sum
View File

@@ -6,16 +6,16 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/ClusterCockpit/cc-lib v0.7.0 h1:THuSYrMcn9pSbrMditSI1LMOluq9TnM0/aVId4uK1Hc= github.com/ClusterCockpit/cc-lib v0.8.0 h1:kQRMOx30CJCy+Q6TgCK9rarJnJ/CKZPWlIEdIXYlxoA=
github.com/ClusterCockpit/cc-lib v0.7.0/go.mod h1:TD1PS8pL2RDvEWaqs8VNejoTSm5OawI9Dcc0CTY/yWQ= github.com/ClusterCockpit/cc-lib v0.8.0/go.mod h1:5xTwONu9pSp15mJ9CjBKGU9I3Jad8NfhrVHJZl50/yI=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo= github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y= github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
@@ -38,6 +38,7 @@ github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc
github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0= github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0=
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -53,10 +54,14 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/expr-lang/expr v1.17.5 h1:i1WrMvcdLF249nSNlpQZN1S6NXuW9WaOfF5tPi3aw3k= github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec=
github.com/expr-lang/expr v1.17.5/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk= github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
@@ -91,6 +96,11 @@ github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeD
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/golang-migrate/migrate/v4 v4.18.2 h1:2VSCMz7x7mjyTXx3m2zPokOY82LTRgxK1yQYKo6wWQ8= github.com/golang-migrate/migrate/v4 v4.18.2 h1:2VSCMz7x7mjyTXx3m2zPokOY82LTRgxK1yQYKo6wWQ8=
github.com/golang-migrate/migrate/v4 v4.18.2/go.mod h1:2CM6tJvn2kqPXwnXO/d3rAQYiyoIm180VsO8PRX6Rpk= github.com/golang-migrate/migrate/v4 v4.18.2/go.mod h1:2CM6tJvn2kqPXwnXO/d3rAQYiyoIm180VsO8PRX6Rpk=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
@@ -127,6 +137,11 @@ github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjw
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI= github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU= github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE= github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM= github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
@@ -155,8 +170,11 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw= github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
@@ -166,6 +184,8 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/linkedin/goavro/v2 v2.14.0 h1:aNO/js65U+Mwq4yB5f1h01c3wiM458qtRad1DN0CMUI=
github.com/linkedin/goavro/v2 v2.14.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
@@ -187,12 +207,13 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/nats-io/nats.go v1.44.0 h1:ECKVrDLdh/kDPV1g0gAQ+2+m2KprqZK5O/eJAyAnH2M= github.com/nats-io/nats.go v1.45.0 h1:/wGPbnYXDM0pLKFjZTX+2JOw9TQPoIgTFrUaH97giwA=
github.com/nats-io/nats.go v1.44.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= github.com/nats-io/nats.go v1.45.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0= github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE= github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -204,12 +225,12 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0= github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
@@ -233,10 +254,11 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE= github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg= github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww= github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww=
@@ -273,8 +295,8 @@ golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliY
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
@@ -295,8 +317,8 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -319,8 +341,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -339,10 +361,10 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
@@ -352,15 +374,16 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb
golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=

View File

@@ -241,7 +241,7 @@ func TestRestApi(t *testing.T) {
"numNodes": 1, "numNodes": 1,
"numHwthreads": 8, "numHwthreads": 8,
"numAcc": 0, "numAcc": 0,
"exclusive": 1, "shared": "none",
"monitoringStatus": 1, "monitoringStatus": 1,
"smt": 1, "smt": 1,
"resources": [ "resources": [
@@ -396,7 +396,7 @@ func TestRestApi(t *testing.T) {
"partition": "default", "partition": "default",
"walltime": 3600, "walltime": 3600,
"numNodes": 1, "numNodes": 1,
"exclusive": 1, "shared": "none",
"monitoringStatus": 1, "monitoringStatus": 1,
"smt": 1, "smt": 1,
"resources": [ "resources": [

View File

@@ -1401,12 +1401,6 @@ const docTemplate = `{
"format": "float64" "format": "float64"
} }
}, },
"exclusive": {
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"footprint": { "footprint": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
@@ -1423,12 +1417,18 @@ const docTemplate = `{
}, },
"jobState": { "jobState": {
"enum": [ "enum": [
"completed", "boot_fail",
"failed",
"cancelled", "cancelled",
"stopped", "completed",
"timeout", "deadline",
"out_of_memory" "failed",
"node_fail",
"out-of-memory",
"pending",
"preempted",
"running",
"suspended",
"timeout"
], ],
"allOf": [ "allOf": [
{ {
@@ -1484,6 +1484,14 @@ const docTemplate = `{
"$ref": "#/definitions/schema.Resource" "$ref": "#/definitions/schema.Resource"
} }
}, },
"shared": {
"type": "string",
"enum": [
"none",
"single_user",
"multi_user"
]
},
"smt": { "smt": {
"type": "integer", "type": "integer",
"example": 4 "example": 4

View File

@@ -15,6 +15,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger" cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema" "github.com/ClusterCockpit/cc-lib/schema"
@@ -95,6 +96,19 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet) r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
} }
func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) {
// REST API Uses TokenAuth
r.HandleFunc("/api/free", memorystore.HandleFree).Methods(http.MethodPost)
r.HandleFunc("/api/write", memorystore.HandleWrite).Methods(http.MethodPost)
r.HandleFunc("/api/debug", memorystore.HandleDebug).Methods(http.MethodGet)
r.HandleFunc("/api/healthcheck", memorystore.HandleHealthCheck).Methods(http.MethodGet)
// Refactor
r.HandleFunc("/api/free/", memorystore.HandleFree).Methods(http.MethodPost)
r.HandleFunc("/api/write/", memorystore.HandleWrite).Methods(http.MethodPost)
r.HandleFunc("/api/debug/", memorystore.HandleDebug).Methods(http.MethodGet)
r.HandleFunc("/api/healthcheck/", memorystore.HandleHealthCheck).Methods(http.MethodGet)
}
func (api *RestApi) MountConfigApiRoutes(r *mux.Router) { func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
r.StrictSlash(true) r.StrictSlash(true)
// Settings Frontend Uses SessionAuth // Settings Frontend Uses SessionAuth

View File

@@ -417,6 +417,42 @@ func (auth *Authentication) AuthUserApi(
}) })
} }
func (auth *Authentication) AuthMetricStoreApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
cclog.Infof("auth metricstore api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
switch {
case len(user.Roles) == 1:
if user.HasRole(schema.RoleApi) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
case len(user.Roles) >= 2:
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
default:
cclog.Info("auth metricstore api -> authentication failed: missing role")
onfailure(rw, r, errors.New("unauthorized"))
}
}
cclog.Info("auth metricstore api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthConfigApi( func (auth *Authentication) AuthConfigApi(
onsuccess http.Handler, onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error), onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),

View File

@@ -0,0 +1,475 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package avro
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"log"
"os"
"path"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-lib/schema"
"github.com/linkedin/goavro/v2"
)
var NumWorkers int = 4
var ErrNoNewData error = errors.New("no data in the pool")
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
levels := make([]*AvroLevel, 0)
selectors := make([][]string, 0)
as.root.lock.RLock()
// Cluster
for sel1, l1 := range as.root.children {
l1.lock.RLock()
// Node
for sel2, l2 := range l1.children {
l2.lock.RLock()
// Frequency
for sel3, l3 := range l2.children {
levels = append(levels, l3)
selectors = append(selectors, []string{sel1, sel2, sel3})
}
l2.lock.RUnlock()
}
l1.lock.RUnlock()
}
as.root.lock.RUnlock()
type workItem struct {
level *AvroLevel
dir string
selector []string
}
n, errs := int32(0), int32(0)
var wg sync.WaitGroup
wg.Add(NumWorkers)
work := make(chan workItem, NumWorkers*2)
for range NumWorkers {
go func() {
defer wg.Done()
for workItem := range work {
from := getTimestamp(workItem.dir)
if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
if err == ErrNoNewData {
continue
}
log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error())
atomic.AddInt32(&errs, 1)
} else {
atomic.AddInt32(&n, 1)
}
}
}()
}
for i := range len(levels) {
dir := path.Join(dir, path.Join(selectors[i]...))
work <- workItem{
level: levels[i],
dir: dir,
selector: selectors[i],
}
}
close(work)
wg.Wait()
if errs > 0 {
return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
}
return int(n), nil
}
// getTimestamp returns the timestamp from the directory name
func getTimestamp(dir string) int64 {
// Extract the resolution and timestamp from the directory name
// The existing avro file will be in epoch timestamp format
// iterate over all the files in the directory and find the maximum timestamp
// and return it
resolution := path.Base(dir)
dir = path.Dir(dir)
files, err := os.ReadDir(dir)
if err != nil {
return 0
}
var maxTs int64 = 0
if len(files) == 0 {
return 0
}
for _, file := range files {
if file.IsDir() {
continue
}
name := file.Name()
if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
continue
}
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
if err != nil {
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
continue
}
if ts > maxTs {
maxTs = ts
}
}
interval, _ := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval)
updateTime := time.Unix(maxTs, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
if updateTime < time.Now().Unix() {
return 0
}
return maxTs
}
func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
l.lock.Lock()
defer l.lock.Unlock()
// fmt.Printf("Checkpointing directory: %s\n", dir)
// filepath contains the resolution
int_res, _ := strconv.Atoi(path.Base(dir))
// find smallest overall timestamp in l.data map and delete it from l.data
minTs := int64(1<<63 - 1)
for ts, dat := range l.data {
if ts < minTs && len(dat) != 0 {
minTs = ts
}
}
if from == 0 && minTs != int64(1<<63-1) {
from = minTs
}
if from == 0 {
return ErrNoNewData
}
var schema string
var codec *goavro.Codec
record_list := make([]map[string]any, 0)
var f *os.File
filePath := dir + fmt.Sprintf("_%d.avro", from)
var err error
fp_, err_ := os.Stat(filePath)
if errors.Is(err_, os.ErrNotExist) {
err = os.MkdirAll(path.Dir(dir), 0o755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
} else if fp_.Size() != 0 {
f, err = os.Open(filePath)
if err != nil {
return fmt.Errorf("failed to open existing avro file: %v", err)
}
br := bufio.NewReader(f)
reader, err := goavro.NewOCFReader(br)
if err != nil {
return fmt.Errorf("failed to create OCF reader: %v", err)
}
codec = reader.Codec()
schema = codec.Schema()
f.Close()
}
time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
if dumpAll {
time_ref = time.Now().Unix()
}
// Empty values
if len(l.data) == 0 {
// we checkpoint avro files every 60 seconds
repeat := 60 / int_res
for range repeat {
record_list = append(record_list, make(map[string]any))
}
}
readFlag := true
for ts := range l.data {
flag := false
if ts < time_ref {
data := l.data[ts]
schema_gen, err := generateSchema(data)
if err != nil {
return err
}
flag, schema, err = compareSchema(schema, schema_gen)
if err != nil {
return fmt.Errorf("failed to compare read and generated schema: %v", err)
}
if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
f.Close()
f, err = os.Open(filePath)
if err != nil {
return fmt.Errorf("failed to open Avro file: %v", err)
}
br := bufio.NewReader(f)
ocfReader, err := goavro.NewOCFReader(br)
if err != nil {
return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
}
for ocfReader.Scan() {
record, err := ocfReader.Read()
if err != nil {
return fmt.Errorf("failed to read record: %v", err)
}
record_list = append(record_list, record.(map[string]any))
}
f.Close()
err = os.Remove(filePath)
if err != nil {
return fmt.Errorf("failed to delete file: %v", err)
}
readFlag = false
}
codec, err = goavro.NewCodec(schema)
if err != nil {
return fmt.Errorf("failed to create codec after merged schema: %v", err)
}
record_list = append(record_list, generateRecord(data))
delete(l.data, ts)
}
}
if len(record_list) == 0 {
return ErrNoNewData
}
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
if err != nil {
return fmt.Errorf("failed to append new avro file: %v", err)
}
// fmt.Printf("Codec : %#v\n", codec)
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
W: f,
Codec: codec,
CompressionName: goavro.CompressionDeflateLabel,
})
if err != nil {
return fmt.Errorf("failed to create OCF writer: %v", err)
}
// Append the new record
if err := writer.Append(record_list); err != nil {
return fmt.Errorf("failed to append record: %v", err)
}
f.Close()
return nil
}
func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
var genSchema, readSchema AvroSchema
if schemaRead == "" {
return false, schemaGen, nil
}
// Unmarshal the schema strings into AvroSchema structs
if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
}
if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
return false, "", fmt.Errorf("failed to parse read schema: %v", err)
}
sort.Slice(genSchema.Fields, func(i, j int) bool {
return genSchema.Fields[i].Name < genSchema.Fields[j].Name
})
sort.Slice(readSchema.Fields, func(i, j int) bool {
return readSchema.Fields[i].Name < readSchema.Fields[j].Name
})
// Check if schemas are identical
schemasEqual := true
if len(genSchema.Fields) <= len(readSchema.Fields) {
for i := range genSchema.Fields {
if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
schemasEqual = false
break
}
}
// If schemas are identical, return the read schema
if schemasEqual {
return false, schemaRead, nil
}
}
// Create a map to hold unique fields from both schemas
fieldMap := make(map[string]AvroField)
// Add fields from the read schema
for _, field := range readSchema.Fields {
fieldMap[field.Name] = field
}
// Add or update fields from the generated schema
for _, field := range genSchema.Fields {
fieldMap[field.Name] = field
}
// Create a union schema by collecting fields from the map
var mergedFields []AvroField
for _, field := range fieldMap {
mergedFields = append(mergedFields, field)
}
// Sort fields by name for consistency
sort.Slice(mergedFields, func(i, j int) bool {
return mergedFields[i].Name < mergedFields[j].Name
})
// Create the merged schema
mergedSchema := AvroSchema{
Type: "record",
Name: genSchema.Name,
Fields: mergedFields,
}
// Check if schemas are identical
schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
if schemasEqual {
for i := range mergedSchema.Fields {
if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
schemasEqual = false
break
}
}
if schemasEqual {
return false, schemaRead, nil
}
}
// Marshal the merged schema back to JSON
mergedSchemaJson, err := json.Marshal(mergedSchema)
if err != nil {
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
}
return true, string(mergedSchemaJson), nil
}
func generateSchema(data map[string]schema.Float) (string, error) {
// Define the Avro schema structure
schema := map[string]any{
"type": "record",
"name": "DataRecord",
"fields": []map[string]any{},
}
fieldTracker := make(map[string]struct{})
for key := range data {
if _, exists := fieldTracker[key]; !exists {
key = correctKey(key)
field := map[string]any{
"name": key,
"type": "double",
"default": -1.0,
}
schema["fields"] = append(schema["fields"].([]map[string]any), field)
fieldTracker[key] = struct{}{}
}
}
schemaString, err := json.Marshal(schema)
if err != nil {
return "", fmt.Errorf("failed to marshal schema: %v", err)
}
return string(schemaString), nil
}
func generateRecord(data map[string]schema.Float) map[string]any {
record := make(map[string]any)
// Iterate through each map in data
for key, value := range data {
key = correctKey(key)
// Set the value in the record
// avro only accepts basic types
record[key] = value.Double()
}
return record
}
func correctKey(key string) string {
// Replace any invalid characters in the key
// For example, replace spaces with underscores
key = strings.ReplaceAll(key, ":", "___")
key = strings.ReplaceAll(key, ".", "__")
return key
}
func ReplaceKey(key string) string {
// Replace any invalid characters in the key
// For example, replace spaces with underscores
key = strings.ReplaceAll(key, "___", ":")
key = strings.ReplaceAll(key, "__", ".")
return key
}

View File

@@ -0,0 +1,84 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package avro
import (
"context"
"log"
"slices"
"strconv"
"sync"
"github.com/ClusterCockpit/cc-backend/internal/config"
)
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
// AvroPool is a pool of Avro writers.
go func() {
if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
wg.Done() // Mark this goroutine as done
return // Exit the goroutine
}
defer wg.Done()
var avroLevel *AvroLevel
oldSelector := make([]string, 0)
for {
select {
case <-ctx.Done():
return
case val := <-LineProtocolMessages:
// Fetch the frequency of the metric from the global configuration
freq, err := config.GetMetricFrequency(val.MetricName)
if err != nil {
log.Printf("Error fetching metric frequency: %s\n", err)
continue
}
metricName := ""
for _, selector_name := range val.Selector {
metricName += selector_name + Delimiter
}
metricName += val.MetricName
// Create a new selector for the Avro level
// The selector is a slice of strings that represents the path to the
// Avro level. It is created by appending the cluster, node, and metric
// name to the selector.
var selector []string
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
if !testEq(oldSelector, selector) {
// Get the Avro level for the metric
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
// If the Avro level is nil, create a new one
if avroLevel == nil {
log.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
}
oldSelector = slices.Clone(selector)
}
avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
}
}
}()
}
func testEq(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}

167
internal/avro/avroStruct.go Normal file
View File

@@ -0,0 +1,167 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package avro
import (
"sync"
"github.com/ClusterCockpit/cc-lib/schema"
)
var (
LineProtocolMessages = make(chan *AvroStruct)
Delimiter = "ZZZZZ"
)
// CheckpointBufferMinutes should always be in minutes.
// Its controls the amount of data to hold for given amount of time.
var CheckpointBufferMinutes = 3
type AvroStruct struct {
MetricName string
Cluster string
Node string
Selector []string
Value schema.Float
Timestamp int64
}
type AvroStore struct {
root AvroLevel
}
var avroStore AvroStore
type AvroLevel struct {
children map[string]*AvroLevel
data map[int64]map[string]schema.Float
lock sync.RWMutex
}
type AvroField struct {
Name string `json:"name"`
Type any `json:"type"`
Default any `json:"default,omitempty"`
}
type AvroSchema struct {
Type string `json:"type"`
Name string `json:"name"`
Fields []AvroField `json:"fields"`
}
func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
if len(selector) == 0 {
return l
}
// Allow concurrent reads:
l.lock.RLock()
var child *AvroLevel
var ok bool
if l.children == nil {
// Children map needs to be created...
l.lock.RUnlock()
} else {
child, ok := l.children[selector[0]]
l.lock.RUnlock()
if ok {
return child.findAvroLevelOrCreate(selector[1:])
}
}
// The level does not exist, take write lock for unqiue access:
l.lock.Lock()
// While this thread waited for the write lock, another thread
// could have created the child node.
if l.children != nil {
child, ok = l.children[selector[0]]
if ok {
l.lock.Unlock()
return child.findAvroLevelOrCreate(selector[1:])
}
}
child = &AvroLevel{
data: make(map[int64]map[string]schema.Float, 0),
children: nil,
}
if l.children != nil {
l.children[selector[0]] = child
} else {
l.children = map[string]*AvroLevel{selector[0]: child}
}
l.lock.Unlock()
return child.findAvroLevelOrCreate(selector[1:])
}
func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
l.lock.Lock()
defer l.lock.Unlock()
KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
// Create keys in advance for the given amount of time
if len(l.data) != KeyCounter {
if len(l.data) == 0 {
for i := range KeyCounter {
l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
}
} else {
// Get the last timestamp
var lastTs int64
for ts := range l.data {
if ts > lastTs {
lastTs = ts
}
}
// Create keys for the next KeyCounter timestamps
l.data[lastTs+int64(Freq)] = make(map[string]schema.Float, 0)
}
}
closestTs := int64(0)
minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
found := false
// Iterate over timestamps and choose the one which is within range.
// Since its epoch time, we check if the difference is less than 60 seconds.
for ts, dat := range l.data {
// Check if timestamp is within range
diff := timestamp - ts
if diff < -int64(Freq) || diff > int64(Freq) {
continue
}
// Metric already present at this timestamp — skip
if _, ok := dat[metricName]; ok {
continue
}
// Check if this is the closest timestamp so far
if Abs(diff) < minDiff {
minDiff = Abs(diff)
closestTs = ts
found = true
}
}
if found {
l.data[closestTs][metricName] = value
}
}
func GetAvroStore() *AvroStore {
return &avroStore
}
// Abs returns the absolute value of x.
func Abs(x int64) int64 {
if x < 0 {
return -x
}
return x
}

View File

@@ -162,7 +162,7 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error()) cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
} }
if Clusters == nil || len(Clusters) < 1 { if len(Clusters) < 1 {
cclog.Abort("Config Init: At least one cluster required in config. Exited with error.") cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
} }
} }

View File

@@ -0,0 +1,128 @@
package config
import (
"bytes"
"encoding/json"
"fmt"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
)
// --------------------
// Metric Store config
// --------------------
type MetricStoreConfig struct {
Checkpoints struct {
FileFormat string `json:"file-format"`
Interval string `json:"interval"`
RootDir string `json:"directory"`
Restore string `json:"restore"`
} `json:"checkpoints"`
Debug struct {
DumpToFile string `json:"dump-to-file"`
EnableGops bool `json:"gops"`
} `json:"debug"`
RetentionInMemory string `json:"retention-in-memory"`
Archive struct {
Interval string `json:"interval"`
RootDir string `json:"directory"`
DeleteInstead bool `json:"delete-instead"`
} `json:"archive"`
Nats []*NatsConfig `json:"nats"`
}
type NatsConfig struct {
// Address of the nats server
Address string `json:"address"`
// Username/Password, optional
Username string `json:"username"`
Password string `json:"password"`
//Creds file path
Credsfilepath string `json:"creds-file-path"`
Subscriptions []struct {
// Channel name
SubscribeTo string `json:"subscribe-to"`
// Allow lines without a cluster tag, use this as default, optional
ClusterTag string `json:"cluster-tag"`
} `json:"subscriptions"`
}
var MetricStoreKeys MetricStoreConfig
// For aggregation over multiple values at different cpus/sockets/..., not time!
type AggregationStrategy int
const (
NoAggregation AggregationStrategy = iota
SumAggregation
AvgAggregation
)
func AssignAggregationStratergy(str string) (AggregationStrategy, error) {
switch str {
case "":
return NoAggregation, nil
case "sum":
return SumAggregation, nil
case "avg":
return AvgAggregation, nil
default:
return NoAggregation, fmt.Errorf("[METRICSTORE]> unknown aggregation strategy: %s", str)
}
}
type MetricConfig struct {
// Interval in seconds at which measurements will arive.
Frequency int64
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
Aggregation AggregationStrategy
// Private, used internally...
Offset int
}
var Metrics map[string]MetricConfig
func InitMetricStore(msConfig json.RawMessage) {
// Validate(msConfigSchema, msConfig)
dec := json.NewDecoder(bytes.NewReader(msConfig))
dec.DisallowUnknownFields()
if err := dec.Decode(&MetricStoreKeys); err != nil {
cclog.Abortf("[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", msConfig, err.Error())
}
}
func GetMetricFrequency(metricName string) (int64, error) {
if metric, ok := Metrics[metricName]; ok {
return metric.Frequency, nil
}
return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName)
}
// add logic to add metrics. Redundant metrics should be updated with max frequency.
// use metric.Name to check if the metric already exists.
// if not, add it to the Metrics map.
func AddMetric(name string, metric MetricConfig) error {
if Metrics == nil {
Metrics = make(map[string]MetricConfig, 0)
}
if existingMetric, ok := Metrics[name]; ok {
if existingMetric.Frequency != metric.Frequency {
if existingMetric.Frequency < metric.Frequency {
existingMetric.Frequency = metric.Frequency
Metrics[name] = existingMetric
}
}
} else {
Metrics[name] = metric
}
return nil
}

View File

@@ -144,7 +144,7 @@ var clustersSchema = `
"type": "string" "type": "string"
} }
}, },
"required": ["kind", "url"] "required": ["kind"]
}, },
"filterRanges": { "filterRanges": {
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",

View File

@@ -118,7 +118,6 @@ type ComplexityRoot struct {
Duration func(childComplexity int) int Duration func(childComplexity int) int
Energy func(childComplexity int) int Energy func(childComplexity int) int
EnergyFootprint func(childComplexity int) int EnergyFootprint func(childComplexity int) int
Exclusive func(childComplexity int) int
Footprint func(childComplexity int) int Footprint func(childComplexity int) int
ID func(childComplexity int) int ID func(childComplexity int) int
JobID func(childComplexity int) int JobID func(childComplexity int) int
@@ -131,6 +130,7 @@ type ComplexityRoot struct {
Project func(childComplexity int) int Project func(childComplexity int) int
Resources func(childComplexity int) int Resources func(childComplexity int) int
SMT func(childComplexity int) int SMT func(childComplexity int) int
Shared func(childComplexity int) int
StartTime func(childComplexity int) int StartTime func(childComplexity int) int
State func(childComplexity int) int State func(childComplexity int) int
SubCluster func(childComplexity int) int SubCluster func(childComplexity int) int
@@ -427,8 +427,6 @@ type ClusterResolver interface {
type JobResolver interface { type JobResolver interface {
StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error) StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error)
Exclusive(ctx context.Context, obj *schema.Job) (int, error)
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error)
@@ -729,13 +727,6 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Job.EnergyFootprint(childComplexity), true return e.complexity.Job.EnergyFootprint(childComplexity), true
case "Job.exclusive":
if e.complexity.Job.Exclusive == nil {
break
}
return e.complexity.Job.Exclusive(childComplexity), true
case "Job.footprint": case "Job.footprint":
if e.complexity.Job.Footprint == nil { if e.complexity.Job.Footprint == nil {
break break
@@ -820,6 +811,13 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Job.SMT(childComplexity), true return e.complexity.Job.SMT(childComplexity), true
case "Job.shared":
if e.complexity.Job.Shared == nil {
break
}
return e.complexity.Job.Shared(childComplexity), true
case "Job.startTime": case "Job.startTime":
if e.complexity.Job.StartTime == nil { if e.complexity.Job.StartTime == nil {
break break
@@ -2379,7 +2377,7 @@ type Job {
numAcc: Int! numAcc: Int!
energy: Float! energy: Float!
SMT: Int! SMT: Int!
exclusive: Int! shared: String!
partition: String! partition: String!
arrayJobId: Int! arrayJobId: Int!
monitoringStatus: Int! monitoringStatus: Int!
@@ -2766,7 +2764,7 @@ input JobFilter {
startTime: TimeRange startTime: TimeRange
state: [JobState!] state: [JobState!]
metricStats: [MetricStatItem!] metricStats: [MetricStatItem!]
exclusive: Int shared: String
node: StringInput node: StringInput
} }
@@ -5241,8 +5239,8 @@ func (ec *executionContext) fieldContext_Job_SMT(_ context.Context, field graphq
return fc, nil return fc, nil
} }
func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { func (ec *executionContext) _Job_shared(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_exclusive(ctx, field) fc, err := ec.fieldContext_Job_shared(ctx, field)
if err != nil { if err != nil {
return graphql.Null return graphql.Null
} }
@@ -5255,7 +5253,7 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
}() }()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children ctx = rctx // use context from middleware stack in children
return ec.resolvers.Job().Exclusive(rctx, obj) return obj.Shared, nil
}) })
if err != nil { if err != nil {
ec.Error(ctx, err) ec.Error(ctx, err)
@@ -5267,19 +5265,19 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
} }
return graphql.Null return graphql.Null
} }
res := resTmp.(int) res := resTmp.(string)
fc.Result = res fc.Result = res
return ec.marshalNInt2int(ctx, field.Selections, res) return ec.marshalNString2string(ctx, field.Selections, res)
} }
func (ec *executionContext) fieldContext_Job_exclusive(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { func (ec *executionContext) fieldContext_Job_shared(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{ fc = &graphql.FieldContext{
Object: "Job", Object: "Job",
Field: field, Field: field,
IsMethod: true, IsMethod: false,
IsResolver: true, IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Int does not have child fields") return nil, errors.New("field of type String does not have child fields")
}, },
} }
return fc, nil return fc, nil
@@ -6428,8 +6426,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(_ context.Context,
return ec.fieldContext_Job_energy(ctx, field) return ec.fieldContext_Job_energy(ctx, field)
case "SMT": case "SMT":
return ec.fieldContext_Job_SMT(ctx, field) return ec.fieldContext_Job_SMT(ctx, field)
case "exclusive": case "shared":
return ec.fieldContext_Job_exclusive(ctx, field) return ec.fieldContext_Job_shared(ctx, field)
case "partition": case "partition":
return ec.fieldContext_Job_partition(ctx, field) return ec.fieldContext_Job_partition(ctx, field)
case "arrayJobId": case "arrayJobId":
@@ -11158,8 +11156,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
return ec.fieldContext_Job_energy(ctx, field) return ec.fieldContext_Job_energy(ctx, field)
case "SMT": case "SMT":
return ec.fieldContext_Job_SMT(ctx, field) return ec.fieldContext_Job_SMT(ctx, field)
case "exclusive": case "shared":
return ec.fieldContext_Job_exclusive(ctx, field) return ec.fieldContext_Job_shared(ctx, field)
case "partition": case "partition":
return ec.fieldContext_Job_partition(ctx, field) return ec.fieldContext_Job_partition(ctx, field)
case "arrayJobId": case "arrayJobId":
@@ -16475,7 +16473,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
asMap[k] = v asMap[k] = v
} }
fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "exclusive", "node"} fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "shared", "node"}
for _, k := range fieldsInOrder { for _, k := range fieldsInOrder {
v, ok := asMap[k] v, ok := asMap[k]
if !ok { if !ok {
@@ -16608,13 +16606,13 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
return it, err return it, err
} }
it.MetricStats = data it.MetricStats = data
case "exclusive": case "shared":
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("exclusive")) ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("shared"))
data, err := ec.unmarshalOInt2ᚖint(ctx, v) data, err := ec.unmarshalOString2ᚖstring(ctx, v)
if err != nil { if err != nil {
return it, err return it, err
} }
it.Exclusive = data it.Shared = data
case "node": case "node":
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node")) ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node"))
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v) data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
@@ -17522,42 +17520,11 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
if out.Values[i] == graphql.Null { if out.Values[i] == graphql.Null {
atomic.AddUint32(&out.Invalids, 1) atomic.AddUint32(&out.Invalids, 1)
} }
case "exclusive": case "shared":
field := field out.Values[i] = ec._Job_shared(ctx, field, obj)
if out.Values[i] == graphql.Null {
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { atomic.AddUint32(&out.Invalids, 1)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Job_exclusive(ctx, field, obj)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
return res
} }
if field.Deferrable != nil {
dfs, ok := deferred[field.Deferrable.Label]
di := 0
if ok {
dfs.AddField(field)
di = len(dfs.Values) - 1
} else {
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
deferred[field.Deferrable.Label] = dfs
}
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
return innerFunc(ctx, dfs)
})
// don't run the out.Concurrently() call below
out.Values[i] = graphql.Null
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "partition": case "partition":
out.Values[i] = ec._Job_partition(ctx, field, obj) out.Values[i] = ec._Job_partition(ctx, field, obj)
if out.Values[i] == graphql.Null { if out.Values[i] == graphql.Null {

View File

@@ -69,7 +69,7 @@ type JobFilter struct {
StartTime *config.TimeRange `json:"startTime,omitempty"` StartTime *config.TimeRange `json:"startTime,omitempty"`
State []schema.JobState `json:"state,omitempty"` State []schema.JobState `json:"state,omitempty"`
MetricStats []*MetricStatItem `json:"metricStats,omitempty"` MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
Exclusive *int `json:"exclusive,omitempty"` Shared *string `json:"shared,omitempty"`
Node *StringInput `json:"node,omitempty"` Node *StringInput `json:"node,omitempty"`
} }

View File

@@ -35,11 +35,6 @@ func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Tim
return &timestamp, nil return &timestamp, nil
} }
// Exclusive is the resolver for the exclusive field.
func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
}
// Tags is the resolver for the tags field. // Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) { func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID) return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
@@ -859,3 +854,15 @@ type mutationResolver struct{ *Resolver }
type nodeResolver struct{ *Resolver } type nodeResolver struct{ *Resolver }
type queryResolver struct{ *Resolver } type queryResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver } type subClusterResolver struct{ *Resolver }
// !!! WARNING !!!
// The code below was going to be deleted when updating resolvers. It has been copied here so you have
// one last chance to move it out of harms way if you want. There are two reasons this happens:
// - When renaming or deleting a resolver the old code will be put in here. You can safely delete
// it when you're done.
// - You have helper methods in this file. Move them out to keep these resolver files clean.
/*
func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
}
*/

View File

@@ -1 +1 @@
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}} {"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"shared":"none","monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}

View File

@@ -1 +1 @@
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"exclusive":1,"jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}} {"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"shared":"none","jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}

419
internal/memorystore/api.go Normal file
View File

@@ -0,0 +1,419 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package memorystore
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"math"
"net/http"
"strconv"
"strings"
"github.com/ClusterCockpit/cc-lib/schema"
"github.com/ClusterCockpit/cc-lib/util"
"github.com/influxdata/line-protocol/v2/lineprotocol"
)
// @title cc-metric-store REST API
// @version 1.0.0
// @description API for cc-metric-store
// @contact.name ClusterCockpit Project
// @contact.url https://clustercockpit.org
// @contact.email support@clustercockpit.org
// @license.name MIT License
// @license.url https://opensource.org/licenses/MIT
// @host localhost:8082
// @basePath /api/
// @securityDefinitions.apikey ApiKeyAuth
// @in header
// @name X-Auth-Token
// ErrorResponse model
type ErrorResponse struct {
// Statustext of Errorcode
Status string `json:"status"`
Error string `json:"error"` // Error Message
}
type ApiMetricData struct {
Error *string `json:"error,omitempty"`
Data schema.FloatArray `json:"data,omitempty"`
From int64 `json:"from"`
To int64 `json:"to"`
Resolution int64 `json:"resolution"`
Avg schema.Float `json:"avg"`
Min schema.Float `json:"min"`
Max schema.Float `json:"max"`
}
func handleError(err error, statusCode int, rw http.ResponseWriter) {
// log.Warnf("REST ERROR : %s", err.Error())
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(statusCode)
json.NewEncoder(rw).Encode(ErrorResponse{
Status: http.StatusText(statusCode),
Error: err.Error(),
})
}
// TODO: Optimize this, just like the stats endpoint!
func (data *ApiMetricData) AddStats() {
n := 0
sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64
for _, x := range data.Data {
if x.IsNaN() {
continue
}
n += 1
sum += float64(x)
min = math.Min(min, float64(x))
max = math.Max(max, float64(x))
}
if n > 0 {
avg := sum / float64(n)
data.Avg = schema.Float(avg)
data.Min = schema.Float(min)
data.Max = schema.Float(max)
} else {
data.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN
}
}
func (data *ApiMetricData) ScaleBy(f schema.Float) {
if f == 0 || f == 1 {
return
}
data.Avg *= f
data.Min *= f
data.Max *= f
for i := 0; i < len(data.Data); i++ {
data.Data[i] *= f
}
}
func (data *ApiMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) {
minfo, ok := ms.Metrics[metric]
if !ok {
return
}
if (data.From / minfo.Frequency) > (from / minfo.Frequency) {
padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))
ndata := make([]schema.Float, 0, padfront+len(data.Data))
for i := 0; i < padfront; i++ {
ndata = append(ndata, schema.NaN)
}
for j := 0; j < len(data.Data); j++ {
ndata = append(ndata, data.Data[j])
}
data.Data = ndata
}
}
// handleFree godoc
// @summary
// @tags free
// @description This endpoint allows the users to free the Buffers from the
// metric store. This endpoint offers the users to remove then systematically
// and also allows then to prune the data under node, if they do not want to
// remove the whole node.
// @produce json
// @param to query string false "up to timestamp"
// @success 200 {string} string "ok"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /free/ [post]
func HandleFree(rw http.ResponseWriter, r *http.Request) {
rawTo := r.URL.Query().Get("to")
if rawTo == "" {
handleError(errors.New("'to' is a required query parameter"), http.StatusBadRequest, rw)
return
}
to, err := strconv.ParseInt(rawTo, 10, 64)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
}
// // TODO: lastCheckpoint might be modified by different go-routines.
// // Load it using the sync/atomic package?
// freeUpTo := lastCheckpoint.Unix()
// if to < freeUpTo {
// freeUpTo = to
// }
bodyDec := json.NewDecoder(r.Body)
var selectors [][]string
err = bodyDec.Decode(&selectors)
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
return
}
ms := GetMemoryStore()
n := 0
for _, sel := range selectors {
bn, err := ms.Free(sel, to)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
}
n += bn
}
rw.WriteHeader(http.StatusOK)
fmt.Fprintf(rw, "buffers freed: %d\n", n)
}
// handleWrite godoc
// @summary Receive metrics in InfluxDB line-protocol
// @tags write
// @description Write data to the in-memory store in the InfluxDB line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)
// @accept plain
// @produce json
// @param cluster query string false "If the lines in the body do not have a cluster tag, use this value instead."
// @success 200 {string} string "ok"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /write/ [post]
func HandleWrite(rw http.ResponseWriter, r *http.Request) {
bytes, err := io.ReadAll(r.Body)
rw.Header().Add("Content-Type", "application/json")
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
}
ms := GetMemoryStore()
dec := lineprotocol.NewDecoderWithBytes(bytes)
if err := decodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
log.Printf("/api/write error: %s", err.Error())
handleError(err, http.StatusBadRequest, rw)
return
}
rw.WriteHeader(http.StatusOK)
}
type ApiQueryRequest struct {
Cluster string `json:"cluster"`
Queries []ApiQuery `json:"queries"`
ForAllNodes []string `json:"for-all-nodes"`
From int64 `json:"from"`
To int64 `json:"to"`
WithStats bool `json:"with-stats"`
WithData bool `json:"with-data"`
WithPadding bool `json:"with-padding"`
}
type ApiQueryResponse struct {
Queries []ApiQuery `json:"queries,omitempty"`
Results [][]ApiMetricData `json:"results"`
}
type ApiQuery struct {
Type *string `json:"type,omitempty"`
SubType *string `json:"subtype,omitempty"`
Metric string `json:"metric"`
Hostname string `json:"host"`
Resolution int64 `json:"resolution"`
TypeIds []string `json:"type-ids,omitempty"`
SubTypeIds []string `json:"subtype-ids,omitempty"`
ScaleFactor schema.Float `json:"scale-by,omitempty"`
Aggregate bool `json:"aggreg"`
}
func FetchData(req ApiQueryRequest) (*ApiQueryResponse, error) {
req.WithData = true
req.WithData = true
req.WithData = true
ms := GetMemoryStore()
response := ApiQueryResponse{
Results: make([][]ApiMetricData, 0, len(req.Queries)),
}
if req.ForAllNodes != nil {
nodes := ms.ListChildren([]string{req.Cluster})
for _, node := range nodes {
for _, metric := range req.ForAllNodes {
q := ApiQuery{
Metric: metric,
Hostname: node,
}
req.Queries = append(req.Queries, q)
response.Queries = append(response.Queries, q)
}
}
}
for _, query := range req.Queries {
sels := make([]util.Selector, 0, 1)
if query.Aggregate || query.Type == nil {
sel := util.Selector{{String: req.Cluster}, {String: query.Hostname}}
if query.Type != nil {
if len(query.TypeIds) == 1 {
sel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]})
} else {
ids := make([]string, len(query.TypeIds))
for i, id := range query.TypeIds {
ids[i] = *query.Type + id
}
sel = append(sel, util.SelectorElement{Group: ids})
}
if query.SubType != nil {
if len(query.SubTypeIds) == 1 {
sel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]})
} else {
ids := make([]string, len(query.SubTypeIds))
for i, id := range query.SubTypeIds {
ids[i] = *query.SubType + id
}
sel = append(sel, util.SelectorElement{Group: ids})
}
}
}
sels = append(sels, sel)
} else {
for _, typeId := range query.TypeIds {
if query.SubType != nil {
for _, subTypeId := range query.SubTypeIds {
sels = append(sels, util.Selector{
{String: req.Cluster},
{String: query.Hostname},
{String: *query.Type + typeId},
{String: *query.SubType + subTypeId},
})
}
} else {
sels = append(sels, util.Selector{
{String: req.Cluster},
{String: query.Hostname},
{String: *query.Type + typeId},
})
}
}
}
// log.Printf("query: %#v\n", query)
// log.Printf("sels: %#v\n", sels)
var err error
res := make([]ApiMetricData, 0, len(sels))
for _, sel := range sels {
data := ApiMetricData{}
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
if err != nil {
msg := err.Error()
data.Error = &msg
res = append(res, data)
continue
}
if req.WithStats {
data.AddStats()
}
if query.ScaleFactor != 0 {
data.ScaleBy(query.ScaleFactor)
}
if req.WithPadding {
data.PadDataWithNull(ms, req.From, req.To, query.Metric)
}
if !req.WithData {
data.Data = nil
}
res = append(res, data)
}
response.Results = append(response.Results, res)
}
return &response, nil
}
// handleDebug godoc
// @summary Debug endpoint
// @tags debug
// @description This endpoint allows the users to print the content of
// nodes/clusters/metrics to review the state of the data.
// @produce json
// @param selector query string false "Selector"
// @success 200 {string} string "Debug dump"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /debug/ [post]
func HandleDebug(rw http.ResponseWriter, r *http.Request) {
raw := r.URL.Query().Get("selector")
rw.Header().Add("Content-Type", "application/json")
selector := []string{}
if len(raw) != 0 {
selector = strings.Split(raw, ":")
}
ms := GetMemoryStore()
if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
handleError(err, http.StatusBadRequest, rw)
return
}
}
// handleHealthCheck godoc
// @summary HealthCheck endpoint
// @tags healthcheck
// @description This endpoint allows the users to check if a node is healthy
// @produce json
// @param selector query string false "Selector"
// @success 200 {string} string "Debug dump"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /healthcheck/ [get]
func HandleHealthCheck(rw http.ResponseWriter, r *http.Request) {
rawCluster := r.URL.Query().Get("cluster")
rawNode := r.URL.Query().Get("node")
if rawCluster == "" || rawNode == "" {
handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
return
}
rw.Header().Add("Content-Type", "application/json")
selector := []string{rawCluster, rawNode}
ms := GetMemoryStore()
if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
handleError(err, http.StatusBadRequest, rw)
return
}
}

View File

@@ -0,0 +1,192 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package memorystore
import (
"archive/zip"
"bufio"
"context"
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"sync"
"sync/atomic"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
)
func Archiving(wg *sync.WaitGroup, ctx context.Context) {
go func() {
defer wg.Done()
d, err := time.ParseDuration(config.MetricStoreKeys.Archive.Interval)
if err != nil {
log.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
}
if d <= 0 {
return
}
ticks := func() <-chan time.Time {
if d <= 0 {
return nil
}
return time.NewTicker(d).C
}()
for {
select {
case <-ctx.Done():
return
case <-ticks:
t := time.Now().Add(-d)
log.Printf("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339))
n, err := ArchiveCheckpoints(config.MetricStoreKeys.Checkpoints.RootDir,
config.MetricStoreKeys.Archive.RootDir, t.Unix(), config.MetricStoreKeys.Archive.DeleteInstead)
if err != nil {
log.Printf("[METRICSTORE]> archiving failed: %s\n", err.Error())
} else {
log.Printf("[METRICSTORE]> done: %d files zipped and moved to archive\n", n)
}
}
}
}()
}
var ErrNoNewData error = errors.New("all data already archived")
// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`,
// deleting them from the `checkpointsDir`.
func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) {
entries1, err := os.ReadDir(checkpointsDir)
if err != nil {
return 0, err
}
type workItem struct {
cdir, adir string
cluster, host string
}
var wg sync.WaitGroup
n, errs := int32(0), int32(0)
work := make(chan workItem, NumWorkers)
wg.Add(NumWorkers)
for worker := 0; worker < NumWorkers; worker++ {
go func() {
defer wg.Done()
for workItem := range work {
m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
if err != nil {
cclog.Errorf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
atomic.AddInt32(&errs, 1)
}
atomic.AddInt32(&n, int32(m))
}
}()
}
for _, de1 := range entries1 {
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
if e != nil {
err = e
}
for _, de2 := range entries2 {
cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
work <- workItem{
adir: adir, cdir: cdir,
cluster: de1.Name(), host: de2.Name(),
}
}
}
close(work)
wg.Wait()
if err != nil {
return int(n), err
}
if errs > 0 {
return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
}
return int(n), nil
}
// Helper function for `ArchiveCheckpoints`.
func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) {
entries, err := os.ReadDir(dir)
if err != nil {
return 0, err
}
extension := config.MetricStoreKeys.Checkpoints.FileFormat
files, err := findFiles(entries, from, extension, false)
if err != nil {
return 0, err
}
if deleteInstead {
n := 0
for _, checkpoint := range files {
filename := filepath.Join(dir, checkpoint)
if err = os.Remove(filename); err != nil {
return n, err
}
n += 1
}
return n, nil
}
filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil && os.IsNotExist(err) {
err = os.MkdirAll(archiveDir, 0o755)
if err == nil {
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
}
}
if err != nil {
return 0, err
}
defer f.Close()
bw := bufio.NewWriter(f)
defer bw.Flush()
zw := zip.NewWriter(bw)
defer zw.Close()
n := 0
for _, checkpoint := range files {
filename := filepath.Join(dir, checkpoint)
r, err := os.Open(filename)
if err != nil {
return n, err
}
defer r.Close()
w, err := zw.Create(checkpoint)
if err != nil {
return n, err
}
if _, err = io.Copy(w, r); err != nil {
return n, err
}
if err = os.Remove(filename); err != nil {
return n, err
}
n += 1
}
return n, nil
}

View File

@@ -0,0 +1,233 @@
package memorystore
import (
"errors"
"sync"
"github.com/ClusterCockpit/cc-lib/schema"
)
// Default buffer capacity.
// `buffer.data` will only ever grow up to it's capacity and a new link
// in the buffer chain will be created if needed so that no copying
// of data or reallocation needs to happen on writes.
const (
BUFFER_CAP int = 512
)
// So that we can reuse allocations
var bufferPool sync.Pool = sync.Pool{
New: func() interface{} {
return &buffer{
data: make([]schema.Float, 0, BUFFER_CAP),
}
},
}
var (
ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
)
// Each metric on each level has it's own buffer.
// This is where the actual values go.
// If `cap(data)` is reached, a new buffer is created and
// becomes the new head of a buffer list.
type buffer struct {
prev *buffer
next *buffer
data []schema.Float
frequency int64
start int64
archived bool
closed bool
}
func newBuffer(ts, freq int64) *buffer {
b := bufferPool.Get().(*buffer)
b.frequency = freq
b.start = ts - (freq / 2)
b.prev = nil
b.next = nil
b.archived = false
b.closed = false
b.data = b.data[:0]
return b
}
// If a new buffer was created, the new head is returnd.
// Otherwise, the existing buffer is returnd.
// Normaly, only "newer" data should be written, but if the value would
// end up in the same buffer anyways it is allowed.
func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
if ts < b.start {
return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
}
// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
idx := int((ts - b.start) / b.frequency)
if idx >= cap(b.data) {
newbuf := newBuffer(ts, b.frequency)
newbuf.prev = b
b.next = newbuf
b.close()
b = newbuf
idx = 0
}
// Overwriting value or writing value from past
if idx < len(b.data) {
b.data[idx] = value
return b, nil
}
// Fill up unwritten slots with NaN
for i := len(b.data); i < idx; i++ {
b.data = append(b.data, schema.NaN)
}
b.data = append(b.data, value)
return b, nil
}
func (b *buffer) end() int64 {
return b.firstWrite() + int64(len(b.data))*b.frequency
}
func (b *buffer) firstWrite() int64 {
return b.start + (b.frequency / 2)
}
func (b *buffer) close() {}
/*
func (b *buffer) close() {
if b.closed {
return
}
b.closed = true
n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64
for _, x := range b.data {
if x.IsNaN() {
continue
}
n += 1
f := float64(x)
sum += f
min = math.Min(min, f)
max = math.Max(max, f)
}
b.statisticts.samples = n
if n > 0 {
b.statisticts.avg = Float(sum / float64(n))
b.statisticts.min = Float(min)
b.statisticts.max = Float(max)
} else {
b.statisticts.avg = NaN
b.statisticts.min = NaN
b.statisticts.max = NaN
}
}
*/
// func interpolate(idx int, data []Float) Float {
// if idx == 0 || idx+1 == len(data) {
// return NaN
// }
// return (data[idx-1] + data[idx+1]) / 2.0
// }
// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
// Simple linear interpolation is done between the two neighboring cells if possible.
// If values at the start or end are missing, instead of NaN values, the second and thrid
// return values contain the actual `from`/`to`.
// This function goes back the buffer chain if `from` is older than the currents buffer start.
// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
// If `data` is not long enough to hold all values, this function will panic!
func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
if from < b.firstWrite() {
if b.prev != nil {
return b.prev.read(from, to, data)
}
from = b.firstWrite()
}
i := 0
t := from
for ; t < to; t += b.frequency {
idx := int((t - b.start) / b.frequency)
if idx >= cap(b.data) {
if b.next == nil {
break
}
b = b.next
idx = 0
}
if idx >= len(b.data) {
if b.next == nil || to <= b.next.start {
break
}
data[i] += schema.NaN
} else if t < b.start {
data[i] += schema.NaN
// } else if b.data[idx].IsNaN() {
// data[i] += interpolate(idx, b.data)
} else {
data[i] += b.data[idx]
}
i++
}
return data[:i], from, t, nil
}
// Returns true if this buffer needs to be freed.
func (b *buffer) free(t int64) (delme bool, n int) {
if b.prev != nil {
delme, m := b.prev.free(t)
n += m
if delme {
b.prev.next = nil
if cap(b.prev.data) == BUFFER_CAP {
bufferPool.Put(b.prev)
}
b.prev = nil
}
}
end := b.end()
if end < t {
return true, n + 1
}
return false, n
}
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
if b == nil {
return nil
}
if err := b.prev.iterFromTo(from, to, callback); err != nil {
return err
}
if from <= b.end() && b.start <= to {
return callback(b)
}
return nil
}
func (b *buffer) count() int64 {
res := int64(len(b.data))
if b.prev != nil {
res += b.prev.count()
}
return res
}

View File

@@ -0,0 +1,765 @@
package memorystore
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log"
"os"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/ClusterCockpit/cc-backend/internal/avro"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-lib/schema"
"github.com/linkedin/goavro/v2"
)
// Whenever changed, update MarshalJSON as well!
type CheckpointMetrics struct {
Data []schema.Float `json:"data"`
Frequency int64 `json:"frequency"`
Start int64 `json:"start"`
}
type CheckpointFile struct {
Metrics map[string]*CheckpointMetrics `json:"metrics"`
Children map[string]*CheckpointFile `json:"children"`
From int64 `json:"from"`
To int64 `json:"to"`
}
var lastCheckpoint time.Time
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
lastCheckpoint = time.Now()
if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
ms := GetMemoryStore()
go func() {
defer wg.Done()
d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval)
if err != nil {
log.Fatal(err)
}
if d <= 0 {
return
}
ticks := func() <-chan time.Time {
if d <= 0 {
return nil
}
return time.NewTicker(d).C
}()
for {
select {
case <-ctx.Done():
return
case <-ticks:
log.Printf("[METRICSTORE]> start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339))
now := time.Now()
n, err := ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir,
lastCheckpoint.Unix(), now.Unix())
if err != nil {
log.Printf("[METRICSTORE]> checkpointing failed: %s\n", err.Error())
} else {
log.Printf("[METRICSTORE]> done: %d checkpoint files created\n", n)
lastCheckpoint = now
}
}
}
}()
} else {
go func() {
defer wg.Done()
d, _ := time.ParseDuration("1m")
select {
case <-ctx.Done():
return
case <-time.After(time.Duration(avro.CheckpointBufferMinutes) * time.Minute):
// This is the first tick untill we collect the data for given minutes.
avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false)
// log.Printf("Checkpointing %d avro files", count)
}
ticks := func() <-chan time.Time {
if d <= 0 {
return nil
}
return time.NewTicker(d).C
}()
for {
select {
case <-ctx.Done():
return
case <-ticks:
// Regular ticks of 1 minute to write data.
avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false)
// log.Printf("Checkpointing %d avro files", count)
}
}
}()
}
}
// As `Float` implements a custom MarshalJSON() function,
// serializing an array of such types has more overhead
// than one would assume (because of extra allocations, interfaces and so on).
func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
buf := make([]byte, 0, 128+len(cm.Data)*8)
buf = append(buf, `{"frequency":`...)
buf = strconv.AppendInt(buf, cm.Frequency, 10)
buf = append(buf, `,"start":`...)
buf = strconv.AppendInt(buf, cm.Start, 10)
buf = append(buf, `,"data":[`...)
for i, x := range cm.Data {
if i != 0 {
buf = append(buf, ',')
}
if x.IsNaN() {
buf = append(buf, `null`...)
} else {
buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)
}
}
buf = append(buf, `]}`...)
return buf, nil
}
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
// On a per-host basis a new JSON file is created. I have no idea if this will scale.
// The good thing: Only a host at a time is locked, so this function can run
// in parallel to writes/reads.
func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
levels := make([]*Level, 0)
selectors := make([][]string, 0)
m.root.lock.RLock()
for sel1, l1 := range m.root.children {
l1.lock.RLock()
for sel2, l2 := range l1.children {
levels = append(levels, l2)
selectors = append(selectors, []string{sel1, sel2})
}
l1.lock.RUnlock()
}
m.root.lock.RUnlock()
type workItem struct {
level *Level
dir string
selector []string
}
n, errs := int32(0), int32(0)
var wg sync.WaitGroup
wg.Add(NumWorkers)
work := make(chan workItem, NumWorkers*2)
for worker := 0; worker < NumWorkers; worker++ {
go func() {
defer wg.Done()
for workItem := range work {
if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {
if err == ErrNoNewData {
continue
}
log.Printf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error())
atomic.AddInt32(&errs, 1)
} else {
atomic.AddInt32(&n, 1)
}
}
}()
}
for i := 0; i < len(levels); i++ {
dir := path.Join(dir, path.Join(selectors[i]...))
work <- workItem{
level: levels[i],
dir: dir,
selector: selectors[i],
}
}
close(work)
wg.Wait()
if errs > 0 {
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
}
return int(n), nil
}
func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
l.lock.RLock()
defer l.lock.RUnlock()
retval := &CheckpointFile{
From: from,
To: to,
Metrics: make(map[string]*CheckpointMetrics),
Children: make(map[string]*CheckpointFile),
}
for metric, minfo := range m.Metrics {
b := l.metrics[minfo.Offset]
if b == nil {
continue
}
allArchived := true
b.iterFromTo(from, to, func(b *buffer) error {
if !b.archived {
allArchived = false
}
return nil
})
if allArchived {
continue
}
data := make([]schema.Float, (to-from)/b.frequency+1)
data, start, end, err := b.read(from, to, data)
if err != nil {
return nil, err
}
for i := int((end - start) / b.frequency); i < len(data); i++ {
data[i] = schema.NaN
}
retval.Metrics[metric] = &CheckpointMetrics{
Frequency: b.frequency,
Start: start,
Data: data,
}
}
for name, child := range l.children {
val, err := child.toCheckpointFile(from, to, m)
if err != nil {
return nil, err
}
if val != nil {
retval.Children[name] = val
}
}
if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
return nil, nil
}
return retval, nil
}
func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
cf, err := l.toCheckpointFile(from, to, m)
if err != nil {
return err
}
if cf == nil {
return ErrNoNewData
}
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil && os.IsNotExist(err) {
err = os.MkdirAll(dir, 0o755)
if err == nil {
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
}
}
if err != nil {
return err
}
defer f.Close()
bw := bufio.NewWriter(f)
if err = json.NewEncoder(bw).Encode(cf); err != nil {
return err
}
return bw.Flush()
}
func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
var wg sync.WaitGroup
work := make(chan [2]string, NumWorkers)
n, errs := int32(0), int32(0)
wg.Add(NumWorkers)
for worker := 0; worker < NumWorkers; worker++ {
go func() {
defer wg.Done()
for host := range work {
lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))
nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension)
if err != nil {
log.Fatalf("[METRICSTORE]> error while loading checkpoints: %s", err.Error())
atomic.AddInt32(&errs, 1)
}
atomic.AddInt32(&n, int32(nn))
}
}()
}
i := 0
clustersDir, err := os.ReadDir(dir)
for _, clusterDir := range clustersDir {
if !clusterDir.IsDir() {
err = errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
goto done
}
hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
if e != nil {
err = e
goto done
}
for _, hostDir := range hostsDir {
if !hostDir.IsDir() {
err = errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
goto done
}
i++
if i%NumWorkers == 0 && i > 100 {
// Forcing garbage collection runs here regulary during the loading of checkpoints
// will decrease the total heap size after loading everything back to memory is done.
// While loading data, the heap will grow fast, so the GC target size will double
// almost always. By forcing GCs here, we can keep it growing more slowly so that
// at the end, less memory is wasted.
runtime.GC()
}
work <- [2]string{clusterDir.Name(), hostDir.Name()}
}
}
done:
close(work)
wg.Wait()
if err != nil {
return int(n), err
}
if errs > 0 {
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
}
return int(n), nil
}
// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
// This function can only be called once and before the very first write or read.
// Different host's data is loaded to memory in parallel.
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
if _, err := os.Stat(dir); os.IsNotExist(err) {
// The directory does not exist, so create it using os.MkdirAll()
err := os.MkdirAll(dir, 0755) // 0755 sets the permissions for the directory
if err != nil {
log.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
}
log.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
}
// Config read (replace with your actual config read)
fileFormat := config.MetricStoreKeys.Checkpoints.FileFormat
if fileFormat == "" {
fileFormat = "avro"
}
// Map to easily get the fallback format
oppositeFormat := map[string]string{
"json": "avro",
"avro": "json",
}
// First, attempt to load the specified format
if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
} else if found {
log.Printf("[METRICSTORE]> Loading %s files because fileformat is %s\n", fileFormat, fileFormat)
return m.FromCheckpoint(dir, from, fileFormat)
}
// If not found, attempt the opposite format
altFormat := oppositeFormat[fileFormat]
if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
} else if found {
log.Printf("[METRICSTORE]> Loading %s files but fileformat is %s\n", altFormat, fileFormat)
return m.FromCheckpoint(dir, from, altFormat)
}
log.Println("[METRICSTORE]> No valid checkpoint files found in the directory.")
return 0, nil
}
func checkFilesWithExtension(dir string, extension string) (bool, error) {
found := false
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("[METRICSTORE]> error accessing path %s: %v", path, err)
}
if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension {
found = true
return nil
}
return nil
})
if err != nil {
return false, fmt.Errorf("[METRICSTORE]> error walking through directories: %s", err)
}
return found, nil
}
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
br := bufio.NewReader(f)
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
}
from_timestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
// Same logic according to lineprotocol
from_timestamp -= (resolution / 2)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
}
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
var recordCounter int64 = 0
// Create a new OCF reader from the buffered reader
ocfReader, err := goavro.NewOCFReader(br)
if err != nil {
panic(err)
}
metricsData := make(map[string]schema.FloatArray)
for ocfReader.Scan() {
datum, err := ocfReader.Read()
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
}
record, ok := datum.(map[string]interface{})
if !ok {
panic("[METRICSTORE]> failed to assert datum as map[string]interface{}")
}
for key, value := range record {
metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
}
recordCounter += 1
}
to := (from_timestamp + (recordCounter / (60 / resolution) * 60))
if to < from {
return nil
}
for key, floatArray := range metricsData {
metricName := avro.ReplaceKey(key)
if strings.Contains(metricName, avro.Delimiter) {
subString := strings.Split(metricName, avro.Delimiter)
lvl := l
for i := 0; i < len(subString)-1; i++ {
sel := subString[i]
if lvl.children == nil {
lvl.children = make(map[string]*Level)
}
child, ok := lvl.children[sel]
if !ok {
child = &Level{
metrics: make([]*buffer, len(m.Metrics)),
children: nil,
}
lvl.children[sel] = child
}
lvl = child
}
leafMetricName := subString[len(subString)-1]
err = lvl.createBuffer(m, leafMetricName, floatArray, from_timestamp, resolution)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
}
} else {
err = l.createBuffer(m, metricName, floatArray, from_timestamp, resolution)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
}
}
}
return nil
}
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
n := len(floatArray)
b := &buffer{
frequency: resolution,
start: from,
data: floatArray[0:n:n],
prev: nil,
next: nil,
archived: true,
}
b.close()
minfo, ok := m.Metrics[metricName]
if !ok {
return nil
// return errors.New("Unkown metric: " + name)
}
prev := l.metrics[minfo.Offset]
if prev == nil {
l.metrics[minfo.Offset] = b
} else {
if prev.start > b.start {
return errors.New("wooops")
}
b.prev = prev
prev.next = b
missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
if missingCount > 0 {
missingCount /= int(b.frequency)
for range missingCount {
prev.data = append(prev.data, schema.NaN)
}
prev.data = prev.data[0:len(prev.data):len(prev.data)]
}
}
l.metrics[minfo.Offset] = b
return nil
}
func (l *Level) loadJsonFile(m *MemoryStore, f *os.File, from int64) error {
br := bufio.NewReader(f)
cf := &CheckpointFile{}
if err := json.NewDecoder(br).Decode(cf); err != nil {
return err
}
if cf.To != 0 && cf.To < from {
return nil
}
if err := l.loadFile(cf, m); err != nil {
return err
}
return nil
}
func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
for name, metric := range cf.Metrics {
n := len(metric.Data)
b := &buffer{
frequency: metric.Frequency,
start: metric.Start,
data: metric.Data[0:n:n], // Space is wasted here :(
prev: nil,
next: nil,
archived: true,
}
b.close()
minfo, ok := m.Metrics[name]
if !ok {
continue
// return errors.New("Unkown metric: " + name)
}
prev := l.metrics[minfo.Offset]
if prev == nil {
l.metrics[minfo.Offset] = b
} else {
if prev.start > b.start {
return errors.New("wooops")
}
b.prev = prev
prev.next = b
}
l.metrics[minfo.Offset] = b
}
if len(cf.Children) > 0 && l.children == nil {
l.children = make(map[string]*Level)
}
for sel, childCf := range cf.Children {
child, ok := l.children[sel]
if !ok {
child = &Level{
metrics: make([]*buffer, len(m.Metrics)),
children: nil,
}
l.children[sel] = child
}
if err := child.loadFile(childCf, m); err != nil {
return err
}
}
return nil
}
func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) {
direntries, err := os.ReadDir(dir)
if err != nil {
if os.IsNotExist(err) {
return 0, nil
}
return 0, err
}
allFiles := make([]fs.DirEntry, 0)
filesLoaded := 0
for _, e := range direntries {
if e.IsDir() {
child := &Level{
metrics: make([]*buffer, len(m.Metrics)),
children: make(map[string]*Level),
}
files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension)
filesLoaded += files
if err != nil {
return filesLoaded, err
}
l.children[e.Name()] = child
} else if strings.HasSuffix(e.Name(), "."+extension) {
allFiles = append(allFiles, e)
} else {
continue
}
}
files, err := findFiles(allFiles, from, extension, true)
if err != nil {
return filesLoaded, err
}
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
"json": l.loadJsonFile,
"avro": l.loadAvroFile,
}
loader := loaders[extension]
for _, filename := range files {
f, err := os.Open(path.Join(dir, filename))
if err != nil {
return filesLoaded, err
}
defer f.Close()
if err = loader(m, f, from); err != nil {
return filesLoaded, err
}
filesLoaded += 1
}
return filesLoaded, nil
}
// This will probably get very slow over time!
// A solution could be some sort of an index file in which all other files
// and the timespan they contain is listed.
func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) {
nums := map[string]int64{}
for _, e := range direntries {
if !strings.HasSuffix(e.Name(), "."+extension) {
continue
}
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
if err != nil {
return nil, err
}
nums[e.Name()] = ts
}
sort.Slice(direntries, func(i, j int) bool {
a, b := direntries[i], direntries[j]
return nums[a.Name()] < nums[b.Name()]
})
filenames := make([]string, 0)
for i := 0; i < len(direntries); i++ {
e := direntries[i]
ts1 := nums[e.Name()]
if findMoreRecentFiles && t <= ts1 {
filenames = append(filenames, e.Name())
}
if i == len(direntries)-1 {
continue
}
enext := direntries[i+1]
ts2 := nums[enext.Name()]
if findMoreRecentFiles {
if ts1 < t && t < ts2 {
filenames = append(filenames, e.Name())
}
} else {
if ts2 < t {
filenames = append(filenames, e.Name())
}
}
}
return filenames, nil
}

View File

@@ -0,0 +1,107 @@
package memorystore
import (
"bufio"
"fmt"
"strconv"
)
func (b *buffer) debugDump(buf []byte) []byte {
if b.prev != nil {
buf = b.prev.debugDump(buf)
}
start, len, end := b.start, len(b.data), b.start+b.frequency*int64(len(b.data))
buf = append(buf, `{"start":`...)
buf = strconv.AppendInt(buf, start, 10)
buf = append(buf, `,"len":`...)
buf = strconv.AppendInt(buf, int64(len), 10)
buf = append(buf, `,"end":`...)
buf = strconv.AppendInt(buf, end, 10)
if b.archived {
buf = append(buf, `,"saved":true`...)
}
if b.next != nil {
buf = append(buf, `},`...)
} else {
buf = append(buf, `}`...)
}
return buf
}
func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) {
l.lock.RLock()
defer l.lock.RUnlock()
for i := 0; i < depth; i++ {
buf = append(buf, '\t')
}
buf = append(buf, '"')
buf = append(buf, lvlname...)
buf = append(buf, "\":{\n"...)
depth += 1
objitems := 0
for name, mc := range m.Metrics {
if b := l.metrics[mc.Offset]; b != nil {
for i := 0; i < depth; i++ {
buf = append(buf, '\t')
}
buf = append(buf, '"')
buf = append(buf, name...)
buf = append(buf, `":[`...)
buf = b.debugDump(buf)
buf = append(buf, "],\n"...)
objitems++
}
}
for name, lvl := range l.children {
_, err := w.Write(buf)
if err != nil {
return nil, err
}
buf = buf[0:0]
buf, err = lvl.debugDump(m, w, name, buf, depth)
if err != nil {
return nil, err
}
buf = append(buf, ',', '\n')
objitems++
}
// remove final `,`:
if objitems > 0 {
buf = append(buf[0:len(buf)-1], '\n')
}
depth -= 1
for i := 0; i < depth; i++ {
buf = append(buf, '\t')
}
buf = append(buf, '}')
return buf, nil
}
func (m *MemoryStore) DebugDump(w *bufio.Writer, selector []string) error {
lvl := m.root.findLevel(selector)
if lvl == nil {
return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
}
buf := make([]byte, 0, 2048)
buf = append(buf, "{"...)
buf, err := lvl.debugDump(m, w, "data", buf, 0)
if err != nil {
return err
}
buf = append(buf, "}\n"...)
if _, err = w.Write(buf); err != nil {
return err
}
return w.Flush()
}

View File

@@ -0,0 +1,88 @@
package memorystore
import (
"bufio"
"fmt"
"time"
)
// This is a threshold that allows a node to be healthy with certain number of data points missing.
// Suppose a node does not receive last 5 data points, then healthCheck endpoint will still say a
// node is healthy. Anything more than 5 missing points in metrics of the node will deem the node unhealthy.
const MaxMissingDataPoints int64 = 5
// This is a threshold which allows upto certain number of metrics in a node to be unhealthly.
// Works with MaxMissingDataPoints. Say 5 metrics (including submetrics) do not receive the last
// MaxMissingDataPoints data points, then the node will be deemed healthy. Any more metrics that does
// not receive data for MaxMissingDataPoints data points will deem the node unhealthy.
const MaxUnhealthyMetrics int64 = 5
func (b *buffer) healthCheck() int64 {
// Check if the buffer is empty
if b.data == nil {
return 1
}
buffer_end := b.start + b.frequency*int64(len(b.data))
t := time.Now().Unix()
// Check if the buffer is too old
if t-buffer_end > MaxMissingDataPoints*b.frequency {
return 1
}
return 0
}
func (l *Level) healthCheck(m *MemoryStore, count int64) (int64, error) {
l.lock.RLock()
defer l.lock.RUnlock()
for _, mc := range m.Metrics {
if b := l.metrics[mc.Offset]; b != nil {
count += b.healthCheck()
}
}
for _, lvl := range l.children {
c, err := lvl.healthCheck(m, 0)
if err != nil {
return 0, err
}
count += c
}
return count, nil
}
func (m *MemoryStore) HealthCheck(w *bufio.Writer, selector []string) error {
lvl := m.root.findLevel(selector)
if lvl == nil {
return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
}
buf := make([]byte, 0, 25)
// buf = append(buf, "{"...)
var count int64 = 0
unhealthyMetricsCount, err := lvl.healthCheck(m, count)
if err != nil {
return err
}
if unhealthyMetricsCount < MaxUnhealthyMetrics {
buf = append(buf, "Healthy"...)
} else {
buf = append(buf, "Unhealthy"...)
}
// buf = append(buf, "}\n"...)
if _, err = w.Write(buf); err != nil {
return err
}
return w.Flush()
}

View File

@@ -0,0 +1,187 @@
package memorystore
import (
"sync"
"unsafe"
"github.com/ClusterCockpit/cc-lib/util"
)
// Could also be called "node" as this forms a node in a tree structure.
// Called Level because "node" might be confusing here.
// Can be both a leaf or a inner node. In this tree structue, inner nodes can
// also hold data (in `metrics`).
type Level struct {
children map[string]*Level
metrics []*buffer
lock sync.RWMutex
}
// Find the correct level for the given selector, creating it if
// it does not exist. Example selector in the context of the
// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
if len(selector) == 0 {
return l
}
// Allow concurrent reads:
l.lock.RLock()
var child *Level
var ok bool
if l.children == nil {
// Children map needs to be created...
l.lock.RUnlock()
} else {
child, ok := l.children[selector[0]]
l.lock.RUnlock()
if ok {
return child.findLevelOrCreate(selector[1:], nMetrics)
}
}
// The level does not exist, take write lock for unqiue access:
l.lock.Lock()
// While this thread waited for the write lock, another thread
// could have created the child node.
if l.children != nil {
child, ok = l.children[selector[0]]
if ok {
l.lock.Unlock()
return child.findLevelOrCreate(selector[1:], nMetrics)
}
}
child = &Level{
metrics: make([]*buffer, nMetrics),
children: nil,
}
if l.children != nil {
l.children[selector[0]] = child
} else {
l.children = map[string]*Level{selector[0]: child}
}
l.lock.Unlock()
return child.findLevelOrCreate(selector[1:], nMetrics)
}
func (l *Level) free(t int64) (int, error) {
l.lock.Lock()
defer l.lock.Unlock()
n := 0
for i, b := range l.metrics {
if b != nil {
delme, m := b.free(t)
n += m
if delme {
if cap(b.data) == BUFFER_CAP {
bufferPool.Put(b)
}
l.metrics[i] = nil
}
}
}
for _, l := range l.children {
m, err := l.free(t)
n += m
if err != nil {
return n, err
}
}
return n, nil
}
func (l *Level) sizeInBytes() int64 {
l.lock.RLock()
defer l.lock.RUnlock()
size := int64(0)
for _, b := range l.metrics {
if b != nil {
size += b.count() * int64(unsafe.Sizeof(util.Float(0)))
}
}
for _, child := range l.children {
size += child.sizeInBytes()
}
return size
}
func (l *Level) findLevel(selector []string) *Level {
if len(selector) == 0 {
return l
}
l.lock.RLock()
defer l.lock.RUnlock()
lvl := l.children[selector[0]]
if lvl == nil {
return nil
}
return lvl.findLevel(selector[1:])
}
func (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error {
l.lock.RLock()
defer l.lock.RUnlock()
if len(selector) == 0 {
b := l.metrics[offset]
if b != nil {
return f(b)
}
for _, lvl := range l.children {
err := lvl.findBuffers(nil, offset, f)
if err != nil {
return err
}
}
return nil
}
sel := selector[0]
if len(sel.String) != 0 && l.children != nil {
lvl, ok := l.children[sel.String]
if ok {
err := lvl.findBuffers(selector[1:], offset, f)
if err != nil {
return err
}
}
return nil
}
if sel.Group != nil && l.children != nil {
for _, key := range sel.Group {
lvl, ok := l.children[key]
if ok {
err := lvl.findBuffers(selector[1:], offset, f)
if err != nil {
return err
}
}
}
return nil
}
if sel.Any && l.children != nil {
for _, lvl := range l.children {
if err := lvl.findBuffers(selector[1:], offset, f); err != nil {
return err
}
}
return nil
}
return nil
}

View File

@@ -0,0 +1,347 @@
package memorystore
import (
"context"
"fmt"
"log"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/avro"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-lib/schema"
"github.com/influxdata/line-protocol/v2/lineprotocol"
"github.com/nats-io/nats.go"
)
// Each connection is handled in it's own goroutine. This is a blocking function.
// func ReceiveRaw(ctx context.Context,
// listener net.Listener,
// handleLine func(*lineprotocol.Decoder, string) error,
// ) error {
// var wg sync.WaitGroup
// wg.Add(1)
// go func() {
// defer wg.Done()
// <-ctx.Done()
// if err := listener.Close(); err != nil {
// log.Printf("listener.Close(): %s", err.Error())
// }
// }()
// for {
// conn, err := listener.Accept()
// if err != nil {
// if errors.Is(err, net.ErrClosed) {
// break
// }
// log.Printf("listener.Accept(): %s", err.Error())
// }
// wg.Add(2)
// go func() {
// defer wg.Done()
// defer conn.Close()
// dec := lineprotocol.NewDecoder(conn)
// connctx, cancel := context.WithCancel(context.Background())
// defer cancel()
// go func() {
// defer wg.Done()
// select {
// case <-connctx.Done():
// conn.Close()
// case <-ctx.Done():
// conn.Close()
// }
// }()
// if err := handleLine(dec, "default"); err != nil {
// if errors.Is(err, net.ErrClosed) {
// return
// }
// log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
// errmsg := make([]byte, 128)
// errmsg = append(errmsg, `error: `...)
// errmsg = append(errmsg, err.Error()...)
// errmsg = append(errmsg, '\n')
// conn.Write(errmsg)
// }
// }()
// }
// wg.Wait()
// return nil
// }
// Connect to a nats server and subscribe to "updates". This is a blocking
// function. handleLine will be called for each line recieved via nats.
// Send `true` through the done channel for gracefull termination.
func ReceiveNats(conf *(config.NatsConfig),
ms *MemoryStore,
workers int,
ctx context.Context,
) error {
var opts []nats.Option
if conf.Username != "" && conf.Password != "" {
opts = append(opts, nats.UserInfo(conf.Username, conf.Password))
}
if conf.Credsfilepath != "" {
opts = append(opts, nats.UserCredentials(conf.Credsfilepath))
}
nc, err := nats.Connect(conf.Address, opts...)
if err != nil {
return err
}
defer nc.Close()
var wg sync.WaitGroup
var subs []*nats.Subscription
msgs := make(chan *nats.Msg, workers*2)
for _, sc := range conf.Subscriptions {
clusterTag := sc.ClusterTag
var sub *nats.Subscription
if workers > 1 {
wg.Add(workers)
for range workers {
go func() {
for m := range msgs {
dec := lineprotocol.NewDecoderWithBytes(m.Data)
if err := decodeLine(dec, ms, clusterTag); err != nil {
log.Printf("error: %s\n", err.Error())
}
}
wg.Done()
}()
}
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
msgs <- m
})
} else {
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
dec := lineprotocol.NewDecoderWithBytes(m.Data)
if err := decodeLine(dec, ms, clusterTag); err != nil {
log.Printf("error: %s\n", err.Error())
}
})
}
if err != nil {
return err
}
log.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address)
subs = append(subs, sub)
}
<-ctx.Done()
for _, sub := range subs {
err = sub.Unsubscribe()
if err != nil {
log.Printf("NATS unsubscribe failed: %s", err.Error())
}
}
close(msgs)
wg.Wait()
nc.Close()
log.Println("NATS connection closed")
return nil
}
// Place `prefix` in front of `buf` but if possible,
// do that inplace in `buf`.
func reorder(buf, prefix []byte) []byte {
n := len(prefix)
m := len(buf)
if cap(buf) < m+n {
return append(prefix[:n:n], buf...)
} else {
buf = buf[:n+m]
for i := m - 1; i >= 0; i-- {
buf[i+n] = buf[i]
}
for i := 0; i < n; i++ {
buf[i] = prefix[i]
}
return buf
}
}
// Decode lines using dec and make write calls to the MemoryStore.
// If a line is missing its cluster tag, use clusterDefault as default.
func decodeLine(dec *lineprotocol.Decoder,
ms *MemoryStore,
clusterDefault string,
) error {
// Reduce allocations in loop:
t := time.Now()
metric, metricBuf := Metric{}, make([]byte, 0, 16)
selector := make([]string, 0, 4)
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
// Optimize for the case where all lines in a "batch" are about the same
// cluster and host. By using `WriteToLevel` (level = host), we do not need
// to take the root- and cluster-level lock as often.
var lvl *Level = nil
prevCluster, prevHost := "", ""
var ok bool
for dec.Next() {
rawmeasurement, err := dec.Measurement()
if err != nil {
return err
}
// Needs to be copied because another call to dec.* would
// invalidate the returned slice.
metricBuf = append(metricBuf[:0], rawmeasurement...)
// The go compiler optimizes map[string(byteslice)] lookups:
metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
if !ok {
continue
}
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
cluster, host := clusterDefault, ""
for {
key, val, err := dec.NextTag()
if err != nil {
return err
}
if key == nil {
break
}
// The go compiler optimizes string([]byte{...}) == "...":
switch string(key) {
case "cluster":
if string(val) == prevCluster {
cluster = prevCluster
} else {
cluster = string(val)
lvl = nil
}
case "hostname", "host":
if string(val) == prevHost {
host = prevHost
} else {
host = string(val)
lvl = nil
}
case "type":
if string(val) == "node" {
break
}
// We cannot be sure that the "type" tag comes before the "type-id" tag:
if len(typeBuf) == 0 {
typeBuf = append(typeBuf, val...)
} else {
typeBuf = reorder(typeBuf, val)
}
case "type-id":
typeBuf = append(typeBuf, val...)
case "subtype":
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
if len(subTypeBuf) == 0 {
subTypeBuf = append(subTypeBuf, val...)
} else {
subTypeBuf = reorder(subTypeBuf, val)
// subTypeBuf = reorder(typeBuf, val)
}
case "stype-id":
subTypeBuf = append(subTypeBuf, val...)
default:
// Ignore unkown tags (cc-metric-collector might send us a unit for example that we do not need)
// return fmt.Errorf("unkown tag: '%s' (value: '%s')", string(key), string(val))
}
}
// If the cluster or host changed, the lvl was set to nil
if lvl == nil {
selector = selector[:2]
selector[0], selector[1] = cluster, host
lvl = ms.GetLevel(selector)
prevCluster, prevHost = cluster, host
}
// subtypes:
selector = selector[:0]
if len(typeBuf) > 0 {
selector = append(selector, string(typeBuf)) // <- Allocation :(
if len(subTypeBuf) > 0 {
selector = append(selector, string(subTypeBuf))
}
}
for {
key, val, err := dec.NextField()
if err != nil {
return err
}
if key == nil {
break
}
if string(key) != "value" {
return fmt.Errorf("host %s: unknown field: '%s' (value: %#v)", host, string(key), val)
}
if val.Kind() == lineprotocol.Float {
metric.Value = schema.Float(val.FloatV())
} else if val.Kind() == lineprotocol.Int {
metric.Value = schema.Float(val.IntV())
} else if val.Kind() == lineprotocol.Uint {
metric.Value = schema.Float(val.UintV())
} else {
return fmt.Errorf("host %s: unsupported value type in message: %s", host, val.Kind().String())
}
}
if t, err = dec.Time(lineprotocol.Second, t); err != nil {
t = time.Now()
if t, err = dec.Time(lineprotocol.Millisecond, t); err != nil {
t = time.Now()
if t, err = dec.Time(lineprotocol.Microsecond, t); err != nil {
t = time.Now()
if t, err = dec.Time(lineprotocol.Nanosecond, t); err != nil {
return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
}
}
}
}
if err != nil {
return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
}
time := t.Unix()
if config.MetricStoreKeys.Checkpoints.FileFormat != "json" {
avro.LineProtocolMessages <- &avro.AvroStruct{
MetricName: string(metricBuf),
Cluster: cluster,
Node: host,
Selector: append([]string{}, selector...),
Value: metric.Value,
Timestamp: time}
}
if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil {
return err
}
}
return nil
}

View File

@@ -0,0 +1,446 @@
package memorystore
import (
"context"
"errors"
"log"
"os"
"os/signal"
"runtime"
"sync"
"syscall"
"time"
"github.com/ClusterCockpit/cc-backend/internal/avro"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-lib/resampler"
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
"github.com/ClusterCockpit/cc-lib/schema"
"github.com/ClusterCockpit/cc-lib/util"
)
var (
singleton sync.Once
msInstance *MemoryStore
)
var Clusters = make([]string, 0)
var NumWorkers int = 4
func init() {
maxWorkers := 10
NumWorkers = runtime.NumCPU()/2 + 1
if NumWorkers > maxWorkers {
NumWorkers = maxWorkers
}
}
type Metric struct {
Name string
Value schema.Float
MetricConfig config.MetricConfig
}
type MemoryStore struct {
Metrics map[string]config.MetricConfig
root Level
}
func Init(wg *sync.WaitGroup) {
startupTime := time.Now()
//Pass the config.MetricStoreKeys
InitMetrics(config.Metrics)
ms := GetMemoryStore()
d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Restore)
if err != nil {
log.Fatal(err)
}
restoreFrom := startupTime.Add(-d)
log.Printf("[METRICSTORE]> Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
files, err := ms.FromCheckpointFiles(config.MetricStoreKeys.Checkpoints.RootDir, restoreFrom.Unix())
loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
if err != nil {
log.Fatalf("[METRICSTORE]> Loading checkpoints failed: %s\n", err.Error())
} else {
log.Printf("[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds())
}
// Try to use less memory by forcing a GC run here and then
// lowering the target percentage. The default of 100 means
// that only once the ratio of new allocations execeds the
// previously active heap, a GC is triggered.
// Forcing a GC here will set the "previously active heap"
// to a minumum.
runtime.GC()
ctx, shutdown := context.WithCancel(context.Background())
wg.Add(4)
Retention(wg, ctx)
Checkpointing(wg, ctx)
Archiving(wg, ctx)
avro.DataStaging(wg, ctx)
wg.Add(1)
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
go func() {
defer wg.Done()
<-sigs
runtimeEnv.SystemdNotifiy(false, "[METRICSTORE]> Shutting down ...")
shutdown()
}()
if config.MetricStoreKeys.Nats != nil {
for _, natsConf := range config.MetricStoreKeys.Nats {
// TODO: When multiple nats configs share a URL, do a single connect.
wg.Add(1)
nc := natsConf
go func() {
// err := ReceiveNats(conf.Nats, decodeLine, runtime.NumCPU()-1, ctx)
err := ReceiveNats(nc, ms, 1, ctx)
if err != nil {
log.Fatal(err)
}
wg.Done()
}()
}
}
}
// Create a new, initialized instance of a MemoryStore.
// Will panic if values in the metric configurations are invalid.
func InitMetrics(metrics map[string]config.MetricConfig) {
singleton.Do(func() {
offset := 0
for key, cfg := range metrics {
if cfg.Frequency == 0 {
panic("[METRICSTORE]> invalid frequency")
}
metrics[key] = config.MetricConfig{
Frequency: cfg.Frequency,
Aggregation: cfg.Aggregation,
Offset: offset,
}
offset += 1
}
msInstance = &MemoryStore{
root: Level{
metrics: make([]*buffer, len(metrics)),
children: make(map[string]*Level),
},
Metrics: metrics,
}
})
}
func GetMemoryStore() *MemoryStore {
if msInstance == nil {
log.Fatalf("[METRICSTORE]> MemoryStore not initialized!")
}
return msInstance
}
func Shutdown() {
log.Printf("[METRICSTORE]> Writing to '%s'...\n", config.MetricStoreKeys.Checkpoints.RootDir)
var files int
var err error
ms := GetMemoryStore()
if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
files, err = ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
} else {
files, err = avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, true)
close(avro.LineProtocolMessages)
}
if err != nil {
log.Printf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
}
log.Printf("[METRICSTORE]> Done! (%d files written)\n", files)
// ms.PrintHeirarchy()
}
// func (m *MemoryStore) PrintHeirarchy() {
// m.root.lock.Lock()
// defer m.root.lock.Unlock()
// fmt.Printf("Root : \n")
// for lvl1, sel1 := range m.root.children {
// fmt.Printf("\t%s\n", lvl1)
// for lvl2, sel2 := range sel1.children {
// fmt.Printf("\t\t%s\n", lvl2)
// if lvl1 == "fritz" && lvl2 == "f0201" {
// for name, met := range m.Metrics {
// mt := sel2.metrics[met.Offset]
// fmt.Printf("\t\t\t\t%s\n", name)
// fmt.Printf("\t\t\t\t")
// for mt != nil {
// // if name == "cpu_load" {
// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
// // }
// mt = mt.prev
// }
// fmt.Printf("\n")
// }
// }
// for lvl3, sel3 := range sel2.children {
// if lvl1 == "fritz" && lvl2 == "f0201" && lvl3 == "hwthread70" {
// fmt.Printf("\t\t\t\t\t%s\n", lvl3)
// for name, met := range m.Metrics {
// mt := sel3.metrics[met.Offset]
// fmt.Printf("\t\t\t\t\t\t%s\n", name)
// fmt.Printf("\t\t\t\t\t\t")
// for mt != nil {
// // if name == "clock" {
// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
// mt = mt.prev
// }
// fmt.Printf("\n")
// }
// // for i, _ := range sel3.metrics {
// // fmt.Printf("\t\t\t\t\t%s\n", getName(configmetrics, i))
// // }
// }
// }
// }
// }
// }
func getName(m *MemoryStore, i int) string {
for key, val := range m.Metrics {
if val.Offset == i {
return key
}
}
return ""
}
func Retention(wg *sync.WaitGroup, ctx context.Context) {
ms := GetMemoryStore()
go func() {
defer wg.Done()
d, err := time.ParseDuration(config.MetricStoreKeys.RetentionInMemory)
if err != nil {
log.Fatal(err)
}
if d <= 0 {
return
}
ticks := func() <-chan time.Time {
d := d / 2
if d <= 0 {
return nil
}
return time.NewTicker(d).C
}()
for {
select {
case <-ctx.Done():
return
case <-ticks:
t := time.Now().Add(-d)
log.Printf("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339))
freed, err := ms.Free(nil, t.Unix())
if err != nil {
log.Printf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error())
} else {
log.Printf("[METRICSTORE]> done: %d buffers freed\n", freed)
}
}
}
}()
}
// Write all values in `metrics` to the level specified by `selector` for time `ts`.
// Look at `findLevelOrCreate` for how selectors work.
func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {
var ok bool
for i, metric := range metrics {
if metric.MetricConfig.Frequency == 0 {
metric.MetricConfig, ok = m.Metrics[metric.Name]
if !ok {
metric.MetricConfig.Frequency = 0
}
metrics[i] = metric
}
}
return m.WriteToLevel(&m.root, selector, ts, metrics)
}
func (m *MemoryStore) GetLevel(selector []string) *Level {
return m.root.findLevelOrCreate(selector, len(m.Metrics))
}
// Assumes that `minfo` in `metrics` is filled in!
func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {
l = l.findLevelOrCreate(selector, len(m.Metrics))
l.lock.Lock()
defer l.lock.Unlock()
for _, metric := range metrics {
if metric.MetricConfig.Frequency == 0 {
continue
}
b := l.metrics[metric.MetricConfig.Offset]
if b == nil {
// First write to this metric and level
b = newBuffer(ts, metric.MetricConfig.Frequency)
l.metrics[metric.MetricConfig.Offset] = b
}
nb, err := b.write(ts, metric.Value)
if err != nil {
return err
}
// Last write created a new buffer...
if b != nb {
l.metrics[metric.MetricConfig.Offset] = nb
}
}
return nil
}
// Returns all values for metric `metric` from `from` to `to` for the selected level(s).
// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
// The second and third return value are the actual from/to for the data. Those can be different from
// the range asked for if no data was available.
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
if from > to {
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range\n")
}
minfo, ok := m.Metrics[metric]
if !ok {
return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: \n" + metric)
}
n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)
err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error {
cdata, cfrom, cto, err := b.read(from, to, data)
if err != nil {
return err
}
if n == 0 {
from, to = cfrom, cto
} else if from != cfrom || to != cto || len(data) != len(cdata) {
missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)
if missingfront != 0 {
return ErrDataDoesNotAlign
}
newlen := len(cdata) - missingback
if newlen < 1 {
return ErrDataDoesNotAlign
}
cdata = cdata[0:newlen]
if len(cdata) != len(data) {
return ErrDataDoesNotAlign
}
from, to = cfrom, cto
}
data = cdata
n += 1
return nil
})
if err != nil {
return nil, 0, 0, 0, err
} else if n == 0 {
return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found\n")
} else if n > 1 {
if minfo.Aggregation == config.AvgAggregation {
normalize := 1. / schema.Float(n)
for i := 0; i < len(data); i++ {
data[i] *= normalize
}
} else if minfo.Aggregation != config.SumAggregation {
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid aggregation")
}
}
data, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)
if err != nil {
return nil, 0, 0, 0, err
}
return data, from, to, resolution, nil
}
// Release all buffers for the selected level and all its children that contain only
// values older than `t`.
func (m *MemoryStore) Free(selector []string, t int64) (int, error) {
return m.GetLevel(selector).free(t)
}
func (m *MemoryStore) FreeAll() error {
for k := range m.root.children {
delete(m.root.children, k)
}
return nil
}
func (m *MemoryStore) SizeInBytes() int64 {
return m.root.sizeInBytes()
}
// Given a selector, return a list of all children of the level selected.
func (m *MemoryStore) ListChildren(selector []string) []string {
lvl := &m.root
for lvl != nil && len(selector) != 0 {
lvl.lock.RLock()
next := lvl.children[selector[0]]
lvl.lock.RUnlock()
lvl = next
selector = selector[1:]
}
if lvl == nil {
return nil
}
lvl.lock.RLock()
defer lvl.lock.RUnlock()
children := make([]string, 0, len(lvl.children))
for child := range lvl.children {
children = append(children, child)
}
return children
}

View File

@@ -0,0 +1,120 @@
package memorystore
import (
"errors"
"math"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-lib/util"
)
type Stats struct {
Samples int
Avg util.Float
Min util.Float
Max util.Float
}
func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {
if from < b.start {
if b.prev != nil {
return b.prev.stats(from, to)
}
from = b.start
}
// TODO: Check if b.closed and if so and the full buffer is queried,
// use b.statistics instead of iterating over the buffer.
samples := 0
sum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
var t int64
for t = from; t < to; t += b.frequency {
idx := int((t - b.start) / b.frequency)
if idx >= cap(b.data) {
b = b.next
if b == nil {
break
}
idx = 0
}
if t < b.start || idx >= len(b.data) {
continue
}
xf := float64(b.data[idx])
if math.IsNaN(xf) {
continue
}
samples += 1
sum += xf
min = math.Min(min, xf)
max = math.Max(max, xf)
}
return Stats{
Samples: samples,
Avg: util.Float(sum) / util.Float(samples),
Min: util.Float(min),
Max: util.Float(max),
}, from, t, nil
}
// Returns statistics for the requested metric on the selected node/level.
// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.
// If `Stats.Samples` is zero, the statistics should not be considered as valid.
func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) {
if from > to {
return nil, 0, 0, errors.New("invalid time range")
}
minfo, ok := m.Metrics[metric]
if !ok {
return nil, 0, 0, errors.New("unkown metric: " + metric)
}
n, samples := 0, 0
avg, min, max := util.Float(0), math.MaxFloat32, -math.MaxFloat32
err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error {
stats, cfrom, cto, err := b.stats(from, to)
if err != nil {
return err
}
if n == 0 {
from, to = cfrom, cto
} else if from != cfrom || to != cto {
return ErrDataDoesNotAlign
}
samples += stats.Samples
avg += stats.Avg
min = math.Min(min, float64(stats.Min))
max = math.Max(max, float64(stats.Max))
n += 1
return nil
})
if err != nil {
return nil, 0, 0, err
}
if n == 0 {
return nil, 0, 0, ErrNoData
}
if minfo.Aggregation == config.AvgAggregation {
avg /= util.Float(n)
} else if n > 1 && minfo.Aggregation != config.SumAggregation {
return nil, 0, 0, errors.New("invalid aggregation")
}
return &Stats{
Samples: samples,
Avg: avg,
Min: util.Float(min),
Max: util.Float(max),
}, from, to, nil
}

View File

@@ -91,14 +91,14 @@ func LoadData(job *schema.Job,
// Pass the resolution from frontend here. // Pass the resolution from frontend here.
for _, v := range jd { for _, v := range jd {
for _, v_ := range v { for _, v_ := range v {
timestep := 0 timestep := int64(0)
for i := 0; i < len(v_.Series); i += 1 { for i := 0; i < len(v_.Series); i += 1 {
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution) v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
if err != nil { if err != nil {
return err, 0, 0 return err, 0, 0
} }
} }
v_.Timestep = timestep v_.Timestep = int(timestep)
} }
} }

View File

@@ -5,23 +5,22 @@
package metricdata package metricdata
import ( import (
"bufio"
"bytes"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger" cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema" "github.com/ClusterCockpit/cc-lib/schema"
) )
// Bloat Code
type CCMetricStoreConfig struct { type CCMetricStoreConfig struct {
Kind string `json:"kind"` Kind string `json:"kind"`
Url string `json:"url"` Url string `json:"url"`
@@ -33,141 +32,16 @@ type CCMetricStoreConfig struct {
Renamings map[string]string `json:"metricRenamings"` Renamings map[string]string `json:"metricRenamings"`
} }
// Bloat Code
type CCMetricStore struct { type CCMetricStore struct {
here2there map[string]string
there2here map[string]string
client http.Client
jwt string
url string
queryEndpoint string
}
type ApiQueryRequest struct {
Cluster string `json:"cluster"`
Queries []ApiQuery `json:"queries"`
ForAllNodes []string `json:"for-all-nodes"`
From int64 `json:"from"`
To int64 `json:"to"`
WithStats bool `json:"with-stats"`
WithData bool `json:"with-data"`
}
type ApiQuery struct {
Type *string `json:"type,omitempty"`
SubType *string `json:"subtype,omitempty"`
Metric string `json:"metric"`
Hostname string `json:"host"`
Resolution int `json:"resolution"`
TypeIds []string `json:"type-ids,omitempty"`
SubTypeIds []string `json:"subtype-ids,omitempty"`
Aggregate bool `json:"aggreg"`
}
type ApiQueryResponse struct {
Queries []ApiQuery `json:"queries,omitempty"`
Results [][]ApiMetricData `json:"results"`
}
type ApiMetricData struct {
Error *string `json:"error"`
Data []schema.Float `json:"data"`
From int64 `json:"from"`
To int64 `json:"to"`
Resolution int `json:"resolution"`
Avg schema.Float `json:"avg"`
Min schema.Float `json:"min"`
Max schema.Float `json:"max"`
} }
// Bloat Code
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error { func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
var config CCMetricStoreConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
cclog.Warn("Error while unmarshaling raw json config")
return err
}
ccms.url = config.Url
ccms.queryEndpoint = fmt.Sprintf("%s/api/query", config.Url)
ccms.jwt = config.Token
ccms.client = http.Client{
Timeout: 10 * time.Second,
}
if config.Renamings != nil {
ccms.here2there = config.Renamings
ccms.there2here = make(map[string]string, len(config.Renamings))
for k, v := range ccms.here2there {
ccms.there2here[v] = k
}
} else {
ccms.here2there = make(map[string]string)
ccms.there2here = make(map[string]string)
}
return nil return nil
} }
func (ccms *CCMetricStore) toRemoteName(metric string) string {
if renamed, ok := ccms.here2there[metric]; ok {
return renamed
}
return metric
}
func (ccms *CCMetricStore) toLocalName(metric string) string {
if renamed, ok := ccms.there2here[metric]; ok {
return renamed
}
return metric
}
func (ccms *CCMetricStore) doRequest(
ctx context.Context,
body *ApiQueryRequest,
) (*ApiQueryResponse, error) {
buf := &bytes.Buffer{}
if err := json.NewEncoder(buf).Encode(body); err != nil {
cclog.Errorf("Error while encoding request body: %s", err.Error())
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
if err != nil {
cclog.Errorf("Error while building request body: %s", err.Error())
return nil, err
}
if ccms.jwt != "" {
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
}
// versioning the cc-metric-store query API.
// v2 = data with resampling
// v1 = data without resampling
q := req.URL.Query()
q.Add("version", "v2")
req.URL.RawQuery = q.Encode()
res, err := ccms.client.Do(req)
if err != nil {
cclog.Errorf("Error while performing request: %s", err.Error())
return nil, err
}
if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("'%s': HTTP Status: %s", ccms.queryEndpoint, res.Status)
}
var resBody ApiQueryResponse
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
cclog.Errorf("Error while decoding result body: %s", err.Error())
return nil, err
}
return &resBody, nil
}
func (ccms *CCMetricStore) LoadData( func (ccms *CCMetricStore) LoadData(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
@@ -175,13 +49,13 @@ func (ccms *CCMetricStore) LoadData(
ctx context.Context, ctx context.Context,
resolution int, resolution int,
) (schema.JobData, error) { ) (schema.JobData, error) {
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution) queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, int64(resolution))
if err != nil { if err != nil {
cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error())
return nil, err return nil, err
} }
req := ApiQueryRequest{ req := memorystore.ApiQueryRequest{
Cluster: job.Cluster, Cluster: job.Cluster,
From: job.StartTime, From: job.StartTime,
To: job.StartTime + int64(job.Duration), To: job.StartTime + int64(job.Duration),
@@ -190,9 +64,9 @@ func (ccms *CCMetricStore) LoadData(
WithData: true, WithData: true,
} }
resBody, err := ccms.doRequest(ctx, &req) resBody, err := memorystore.FetchData(req)
if err != nil { if err != nil {
cclog.Errorf("Error while performing request: %s", err.Error()) cclog.Errorf("Error while fetching data : %s", err.Error())
return nil, err return nil, err
} }
@@ -200,7 +74,7 @@ func (ccms *CCMetricStore) LoadData(
jobData := make(schema.JobData) jobData := make(schema.JobData)
for i, row := range resBody.Results { for i, row := range resBody.Results {
query := req.Queries[i] query := req.Queries[i]
metric := ccms.toLocalName(query.Metric) metric := query.Metric
scope := assignedScope[i] scope := assignedScope[i]
mc := archive.GetMetricConfig(job.Cluster, metric) mc := archive.GetMetricConfig(job.Cluster, metric)
if _, ok := jobData[metric]; !ok { if _, ok := jobData[metric]; !ok {
@@ -209,7 +83,7 @@ func (ccms *CCMetricStore) LoadData(
res := mc.Timestep res := mc.Timestep
if len(row) > 0 { if len(row) > 0 {
res = row[0].Resolution res = int(row[0].Resolution)
} }
jobMetric, ok := jobData[metric][scope] jobMetric, ok := jobData[metric][scope]
@@ -282,9 +156,9 @@ func (ccms *CCMetricStore) buildQueries(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
resolution int, resolution int64,
) ([]ApiQuery, []schema.MetricScope, error) { ) ([]memorystore.ApiQuery, []schema.MetricScope, error) {
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
assignedScope := []schema.MetricScope{} assignedScope := []schema.MetricScope{}
subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster) subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
@@ -294,7 +168,6 @@ func (ccms *CCMetricStore) buildQueries(
topology := subcluster.Topology topology := subcluster.Topology
for _, metric := range metrics { for _, metric := range metrics {
remoteName := ccms.toRemoteName(metric)
mc := archive.GetMetricConfig(job.Cluster, metric) mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil { if mc == nil {
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster) // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
@@ -306,7 +179,7 @@ func (ccms *CCMetricStore) buildQueries(
if len(mc.SubClusters) != 0 { if len(mc.SubClusters) != 0 {
isRemoved := false isRemoved := false
for _, scConfig := range mc.SubClusters { for _, scConfig := range mc.SubClusters {
if scConfig.Name == job.SubCluster && scConfig.Remove == true { if scConfig.Name == job.SubCluster && scConfig.Remove {
isRemoved = true isRemoved = true
break break
} }
@@ -347,8 +220,8 @@ func (ccms *CCMetricStore) buildQueries(
continue continue
} }
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &acceleratorString, Type: &acceleratorString,
@@ -365,8 +238,8 @@ func (ccms *CCMetricStore) buildQueries(
continue continue
} }
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &acceleratorString, Type: &acceleratorString,
@@ -379,8 +252,8 @@ func (ccms *CCMetricStore) buildQueries(
// HWThread -> HWThead // HWThread -> HWThead
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &hwthreadString, Type: &hwthreadString,
@@ -395,8 +268,8 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
cores, _ := topology.GetCoresFromHWThreads(hwthreads) cores, _ := topology.GetCoresFromHWThreads(hwthreads)
for _, core := range cores { for _, core := range cores {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
@@ -412,8 +285,8 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
for _, socket := range sockets { for _, socket := range sockets {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
@@ -427,8 +300,8 @@ func (ccms *CCMetricStore) buildQueries(
// HWThread -> Node // HWThread -> Node
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
@@ -442,8 +315,8 @@ func (ccms *CCMetricStore) buildQueries(
// Core -> Core // Core -> Core
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
cores, _ := topology.GetCoresFromHWThreads(hwthreads) cores, _ := topology.GetCoresFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &coreString, Type: &coreString,
@@ -458,8 +331,8 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromCores(hwthreads) sockets, _ := topology.GetSocketsFromCores(hwthreads)
for _, socket := range sockets { for _, socket := range sockets {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &coreString, Type: &coreString,
@@ -474,8 +347,8 @@ func (ccms *CCMetricStore) buildQueries(
// Core -> Node // Core -> Node
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
cores, _ := topology.GetCoresFromHWThreads(hwthreads) cores, _ := topology.GetCoresFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &coreString, Type: &coreString,
@@ -489,8 +362,8 @@ func (ccms *CCMetricStore) buildQueries(
// MemoryDomain -> MemoryDomain // MemoryDomain -> MemoryDomain
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &memoryDomainString, Type: &memoryDomainString,
@@ -504,8 +377,8 @@ func (ccms *CCMetricStore) buildQueries(
// MemoryDoman -> Node // MemoryDoman -> Node
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &memoryDomainString, Type: &memoryDomainString,
@@ -519,8 +392,8 @@ func (ccms *CCMetricStore) buildQueries(
// Socket -> Socket // Socket -> Socket
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &socketString, Type: &socketString,
@@ -534,8 +407,8 @@ func (ccms *CCMetricStore) buildQueries(
// Socket -> Node // Socket -> Node
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &socketString, Type: &socketString,
@@ -548,8 +421,8 @@ func (ccms *CCMetricStore) buildQueries(
// Node -> Node // Node -> Node
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: host.Hostname, Hostname: host.Hostname,
Resolution: resolution, Resolution: resolution,
}) })
@@ -576,7 +449,7 @@ func (ccms *CCMetricStore) LoadStats(
return nil, err return nil, err
} }
req := ApiQueryRequest{ req := memorystore.ApiQueryRequest{
Cluster: job.Cluster, Cluster: job.Cluster,
From: job.StartTime, From: job.StartTime,
To: job.StartTime + int64(job.Duration), To: job.StartTime + int64(job.Duration),
@@ -585,16 +458,16 @@ func (ccms *CCMetricStore) LoadStats(
WithData: false, WithData: false,
} }
resBody, err := ccms.doRequest(ctx, &req) resBody, err := memorystore.FetchData(req)
if err != nil { if err != nil {
cclog.Errorf("Error while performing request: %s", err.Error()) cclog.Errorf("Error while fetching data : %s", err.Error())
return nil, err return nil, err
} }
stats := make(map[string]map[string]schema.MetricStatistics, len(metrics)) stats := make(map[string]map[string]schema.MetricStatistics, len(metrics))
for i, res := range resBody.Results { for i, res := range resBody.Results {
query := req.Queries[i] query := req.Queries[i]
metric := ccms.toLocalName(query.Metric) metric := query.Metric
data := res[0] data := res[0]
if data.Error != nil { if data.Error != nil {
cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
@@ -635,7 +508,7 @@ func (ccms *CCMetricStore) LoadScopedStats(
return nil, err return nil, err
} }
req := ApiQueryRequest{ req := memorystore.ApiQueryRequest{
Cluster: job.Cluster, Cluster: job.Cluster,
From: job.StartTime, From: job.StartTime,
To: job.StartTime + int64(job.Duration), To: job.StartTime + int64(job.Duration),
@@ -644,9 +517,9 @@ func (ccms *CCMetricStore) LoadScopedStats(
WithData: false, WithData: false,
} }
resBody, err := ccms.doRequest(ctx, &req) resBody, err := memorystore.FetchData(req)
if err != nil { if err != nil {
cclog.Errorf("Error while performing request: %s", err.Error()) cclog.Errorf("Error while fetching data : %s", err.Error())
return nil, err return nil, err
} }
@@ -655,7 +528,7 @@ func (ccms *CCMetricStore) LoadScopedStats(
for i, row := range resBody.Results { for i, row := range resBody.Results {
query := req.Queries[i] query := req.Queries[i]
metric := ccms.toLocalName(query.Metric) metric := query.Metric
scope := assignedScope[i] scope := assignedScope[i]
if _, ok := scopedJobStats[metric]; !ok { if _, ok := scopedJobStats[metric]; !ok {
@@ -721,7 +594,7 @@ func (ccms *CCMetricStore) LoadNodeData(
from, to time.Time, from, to time.Time,
ctx context.Context, ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) { ) (map[string]map[string][]*schema.JobMetric, error) {
req := ApiQueryRequest{ req := memorystore.ApiQueryRequest{
Cluster: cluster, Cluster: cluster,
From: from.Unix(), From: from.Unix(),
To: to.Unix(), To: to.Unix(),
@@ -730,38 +603,36 @@ func (ccms *CCMetricStore) LoadNodeData(
} }
if nodes == nil { if nodes == nil {
for _, metric := range metrics { req.ForAllNodes = append(req.ForAllNodes, metrics...)
req.ForAllNodes = append(req.ForAllNodes, ccms.toRemoteName(metric))
}
} else { } else {
for _, node := range nodes { for _, node := range nodes {
for _, metric := range metrics { for _, metric := range metrics {
req.Queries = append(req.Queries, ApiQuery{ req.Queries = append(req.Queries, memorystore.ApiQuery{
Hostname: node, Hostname: node,
Metric: ccms.toRemoteName(metric), Metric: metric,
Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution
}) })
} }
} }
} }
resBody, err := ccms.doRequest(ctx, &req) resBody, err := memorystore.FetchData(req)
if err != nil { if err != nil {
cclog.Errorf("Error while performing request: %s", err.Error()) cclog.Errorf("Error while fetching data : %s", err.Error())
return nil, err return nil, err
} }
var errors []string var errors []string
data := make(map[string]map[string][]*schema.JobMetric) data := make(map[string]map[string][]*schema.JobMetric)
for i, res := range resBody.Results { for i, res := range resBody.Results {
var query ApiQuery var query memorystore.ApiQuery
if resBody.Queries != nil { if resBody.Queries != nil {
query = resBody.Queries[i] query = resBody.Queries[i]
} else { } else {
query = req.Queries[i] query = req.Queries[i]
} }
metric := ccms.toLocalName(query.Metric) metric := query.Metric
qdata := res[0] qdata := res[0]
if qdata.Error != nil { if qdata.Error != nil {
/* Build list for "partial errors", if any */ /* Build list for "partial errors", if any */
@@ -861,13 +732,13 @@ func (ccms *CCMetricStore) LoadNodeListData(
// Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria // Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution) queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution))
if err != nil { if err != nil {
cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error())
return nil, totalNodes, hasNextPage, err return nil, totalNodes, hasNextPage, err
} }
req := ApiQueryRequest{ req := memorystore.ApiQueryRequest{
Cluster: cluster, Cluster: cluster,
Queries: queries, Queries: queries,
From: from.Unix(), From: from.Unix(),
@@ -876,29 +747,29 @@ func (ccms *CCMetricStore) LoadNodeListData(
WithData: true, WithData: true,
} }
resBody, err := ccms.doRequest(ctx, &req) resBody, err := memorystore.FetchData(req)
if err != nil { if err != nil {
cclog.Errorf("Error while performing request: %s", err.Error()) cclog.Errorf("Error while fetching data : %s", err.Error())
return nil, totalNodes, hasNextPage, err return nil, totalNodes, hasNextPage, err
} }
var errors []string var errors []string
data := make(map[string]schema.JobData) data := make(map[string]schema.JobData)
for i, row := range resBody.Results { for i, row := range resBody.Results {
var query ApiQuery var query memorystore.ApiQuery
if resBody.Queries != nil { if resBody.Queries != nil {
query = resBody.Queries[i] query = resBody.Queries[i]
} else { } else {
query = req.Queries[i] query = req.Queries[i]
} }
// qdata := res[0] // qdata := res[0]
metric := ccms.toLocalName(query.Metric) metric := query.Metric
scope := assignedScope[i] scope := assignedScope[i]
mc := archive.GetMetricConfig(cluster, metric) mc := archive.GetMetricConfig(cluster, metric)
res := mc.Timestep res := mc.Timestep
if len(row) > 0 { if len(row) > 0 {
res = row[0].Resolution res = int(row[0].Resolution)
} }
// Init Nested Map Data Structures If Not Found // Init Nested Map Data Structures If Not Found
@@ -971,9 +842,9 @@ func (ccms *CCMetricStore) buildNodeQueries(
nodes []string, nodes []string,
metrics []string, metrics []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
resolution int, resolution int64,
) ([]ApiQuery, []schema.MetricScope, error) { ) ([]memorystore.ApiQuery, []schema.MetricScope, error) {
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
assignedScope := []schema.MetricScope{} assignedScope := []schema.MetricScope{}
// Get Topol before loop if subCluster given // Get Topol before loop if subCluster given
@@ -988,7 +859,7 @@ func (ccms *CCMetricStore) buildNodeQueries(
} }
for _, metric := range metrics { for _, metric := range metrics {
remoteName := ccms.toRemoteName(metric) metric := metric
mc := archive.GetMetricConfig(cluster, metric) mc := archive.GetMetricConfig(cluster, metric)
if mc == nil { if mc == nil {
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster) // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
@@ -1000,7 +871,7 @@ func (ccms *CCMetricStore) buildNodeQueries(
if mc.SubClusters != nil { if mc.SubClusters != nil {
isRemoved := false isRemoved := false
for _, scConfig := range mc.SubClusters { for _, scConfig := range mc.SubClusters {
if scConfig.Name == subCluster && scConfig.Remove == true { if scConfig.Name == subCluster && scConfig.Remove {
isRemoved = true isRemoved = true
break break
} }
@@ -1056,8 +927,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
continue continue
} }
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: false, Aggregate: false,
Type: &acceleratorString, Type: &acceleratorString,
@@ -1074,8 +945,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
continue continue
} }
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &acceleratorString, Type: &acceleratorString,
@@ -1088,8 +959,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// HWThread -> HWThead // HWThread -> HWThead
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: false, Aggregate: false,
Type: &hwthreadString, Type: &hwthreadString,
@@ -1104,8 +975,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
cores, _ := topology.GetCoresFromHWThreads(topology.Node) cores, _ := topology.GetCoresFromHWThreads(topology.Node)
for _, core := range cores { for _, core := range cores {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
@@ -1121,8 +992,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
for _, socket := range sockets { for _, socket := range sockets {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
@@ -1136,8 +1007,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// HWThread -> Node // HWThread -> Node
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
@@ -1151,8 +1022,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// Core -> Core // Core -> Core
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
cores, _ := topology.GetCoresFromHWThreads(topology.Node) cores, _ := topology.GetCoresFromHWThreads(topology.Node)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: false, Aggregate: false,
Type: &coreString, Type: &coreString,
@@ -1167,8 +1038,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromCores(topology.Node) sockets, _ := topology.GetSocketsFromCores(topology.Node)
for _, socket := range sockets { for _, socket := range sockets {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &coreString, Type: &coreString,
@@ -1183,8 +1054,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// Core -> Node // Core -> Node
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
cores, _ := topology.GetCoresFromHWThreads(topology.Node) cores, _ := topology.GetCoresFromHWThreads(topology.Node)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &coreString, Type: &coreString,
@@ -1198,8 +1069,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// MemoryDomain -> MemoryDomain // MemoryDomain -> MemoryDomain
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: false, Aggregate: false,
Type: &memoryDomainString, Type: &memoryDomainString,
@@ -1213,8 +1084,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// MemoryDoman -> Node // MemoryDoman -> Node
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &memoryDomainString, Type: &memoryDomainString,
@@ -1228,8 +1099,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// Socket -> Socket // Socket -> Socket
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: false, Aggregate: false,
Type: &socketString, Type: &socketString,
@@ -1243,8 +1114,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// Socket -> Node // Socket -> Node
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Aggregate: true, Aggregate: true,
Type: &socketString, Type: &socketString,
@@ -1257,8 +1128,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
// Node -> Node // Node -> Node
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{ queries = append(queries, memorystore.ApiQuery{
Metric: remoteName, Metric: metric,
Hostname: hostname, Hostname: hostname,
Resolution: resolution, Resolution: resolution,
}) })

View File

@@ -74,9 +74,8 @@ func (tmdr *TestMetricDataRepository) LoadNodeListData(
} }
func DeepCopy(jd_temp schema.JobData) schema.JobData { func DeepCopy(jd_temp schema.JobData) schema.JobData {
var jd schema.JobData
jd = make(schema.JobData, len(jd_temp)) jd := make(schema.JobData, len(jd_temp))
for k, v := range jd_temp { for k, v := range jd_temp {
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k])) jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
for k_, v_ := range v { for k_, v_ := range v {

View File

@@ -52,18 +52,18 @@ func GetJobRepository() *JobRepository {
} }
var jobColumns []string = []string{ var jobColumns []string = []string{
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.id", "job.job_id", "job.hpc_user", "job.project", "job.hpc_cluster", "job.subcluster",
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes", "job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status",
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources", "job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
"job.footprint", "job.energy", "job.footprint", "job.energy",
} }
var jobCacheColumns []string = []string{ var jobCacheColumns []string = []string{
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster", "job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.hpc_cluster",
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition", "job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads", "job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
"job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt", "job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt",
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources", "job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
"job_cache.footprint", "job_cache.energy", "job_cache.footprint", "job_cache.energy",
} }
@@ -390,7 +390,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
start := time.Now() start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) { partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
parts := []string{} parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil { if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.hpc_cluster = ?;`, cluster); err != nil {
return nil, 0, 1000 return nil, 0, 1000
} }
@@ -410,7 +410,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
subclusters := make(map[string]map[string]int) subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job"). rows, err := sq.Select("resources", "subcluster").From("job").
Where("job.job_state = 'running'"). Where("job.job_state = 'running'").
Where("job.cluster = ?", cluster). Where("job.hpc_cluster = ?", cluster).
RunWith(r.stmtCache).Query() RunWith(r.stmtCache).Query()
if err != nil { if err != nil {
cclog.Error("Error while running query") cclog.Error("Error while running query")
@@ -505,7 +505,7 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
// FIXME: Reconsider filtering short jobs with harcoded threshold // FIXME: Reconsider filtering short jobs with harcoded threshold
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) { func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job"). query := sq.Select(jobColumns...).From("job").
Where(fmt.Sprintf("job.cluster = '%s'", cluster)). Where(fmt.Sprintf("job.hpc_cluster = '%s'", cluster)).
Where("job.job_state = 'running'"). Where("job.job_state = 'running'").
Where("job.duration > 600") Where("job.duration > 600")

View File

@@ -14,19 +14,19 @@ import (
) )
const NamedJobCacheInsert string = `INSERT INTO job_cache ( const NamedJobCacheInsert string = `INSERT INTO job_cache (
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
) VALUES ( ) VALUES (
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc, :job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);` );`
const NamedJobInsert string = `INSERT INTO job ( const NamedJobInsert string = `INSERT INTO job (
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
) VALUES ( ) VALUES (
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc, :job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);` );`
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) { func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
@@ -70,7 +70,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
} }
_, err = r.DB.Exec( _, err = r.DB.Exec(
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache") "INSERT INTO job (job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
if err != nil { if err != nil {
cclog.Warnf("Error while Job sync: %v", err) cclog.Warnf("Error while Job sync: %v", err)
return nil, err return nil, err

View File

@@ -31,7 +31,7 @@ func (r *JobRepository) Find(
Where("job.job_id = ?", *jobId) Where("job.job_id = ?", *jobId)
if cluster != nil { if cluster != nil {
q = q.Where("job.cluster = ?", *cluster) q = q.Where("job.hpc_cluster = ?", *cluster)
} }
if startTime != nil { if startTime != nil {
q = q.Where("job.start_time = ?", *startTime) q = q.Where("job.start_time = ?", *startTime)
@@ -52,7 +52,7 @@ func (r *JobRepository) FindCached(
Where("job_cache.job_id = ?", *jobId) Where("job_cache.job_id = ?", *jobId)
if cluster != nil { if cluster != nil {
q = q.Where("job_cache.cluster = ?", *cluster) q = q.Where("job_cache.hpc_cluster = ?", *cluster)
} }
if startTime != nil { if startTime != nil {
q = q.Where("job_cache.start_time = ?", *startTime) q = q.Where("job_cache.start_time = ?", *startTime)
@@ -78,7 +78,7 @@ func (r *JobRepository) FindAll(
Where("job.job_id = ?", *jobId) Where("job.job_id = ?", *jobId)
if cluster != nil { if cluster != nil {
q = q.Where("job.cluster = ?", *cluster) q = q.Where("job.hpc_cluster = ?", *cluster)
} }
if startTime != nil { if startTime != nil {
q = q.Where("job.start_time = ?", *startTime) q = q.Where("job.start_time = ?", *startTime)
@@ -183,7 +183,7 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
q := sq.Select(jobColumns...). q := sq.Select(jobColumns...).
From("job"). From("job").
Where("job.job_id = ?", jobId). Where("job.job_id = ?", jobId).
Where("job.cluster = ?", cluster). Where("job.hpc_cluster = ?", cluster).
Where("job.start_time = ?", startTime) Where("job.start_time = ?", startTime)
q, qerr := SecurityCheck(ctx, q) q, qerr := SecurityCheck(ctx, q)
@@ -203,7 +203,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl
From("job"). From("job").
Where("job.job_id = ?", jobId). Where("job.job_id = ?", jobId).
Where("job.hpc_user = ?", user). Where("job.hpc_user = ?", user).
Where("job.cluster = ?", cluster). Where("job.hpc_cluster = ?", cluster).
Where("job.start_time = ?", startTime) Where("job.start_time = ?", startTime)
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow()) _, err := scanJob(q.RunWith(r.stmtCache).QueryRow())

View File

@@ -168,7 +168,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
query = buildMetaJsonCondition("jobName", filter.JobName, query) query = buildMetaJsonCondition("jobName", filter.JobName, query)
} }
if filter.Cluster != nil { if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query) query = buildStringCondition("job.hpc_cluster", filter.Cluster, query)
} }
if filter.Partition != nil { if filter.Partition != nil {
query = buildStringCondition("job.cluster_partition", filter.Partition, query) query = buildStringCondition("job.cluster_partition", filter.Partition, query)
@@ -183,8 +183,8 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor) query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
} }
if filter.Exclusive != nil { if filter.Shared != nil {
query = query.Where("job.exclusive = ?", *filter.Exclusive) query = query.Where("job.shared = ?", *filter.Shared)
} }
if filter.State != nil { if filter.State != nil {
states := make([]string, len(filter.State)) states := make([]string, len(filter.State))

View File

@@ -3,7 +3,7 @@ CREATE TABLE "job_cache" (
job_id BIGINT NOT NULL, job_id BIGINT NOT NULL,
hpc_cluster VARCHAR(255) NOT NULL, hpc_cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL,
submit_time BIGINT NOT NULL, -- Unix timestamp submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
hpc_user VARCHAR(255) NOT NULL, hpc_user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL, project VARCHAR(255) NOT NULL,
@@ -30,7 +30,7 @@ CREATE TABLE "job_cache" (
energy REAL NOT NULL DEFAULT 0.0, energy REAL NOT NULL DEFAULT 0.0,
energy_footprint TEXT DEFAULT NULL, energy_footprint TEXT DEFAULT NULL,
footprint TEXT DEFAULT NULL, footprint TEXT DEFAULT NULL,
UNIQUE (job_id, cluster, start_time) UNIQUE (job_id, hpc_cluster, start_time)
); );
CREATE TABLE "job_new" ( CREATE TABLE "job_new" (
@@ -65,10 +65,33 @@ CREATE TABLE "job_new" (
energy REAL NOT NULL DEFAULT 0.0, energy REAL NOT NULL DEFAULT 0.0,
energy_footprint TEXT DEFAULT NULL, energy_footprint TEXT DEFAULT NULL,
footprint TEXT DEFAULT NULL, footprint TEXT DEFAULT NULL,
UNIQUE (job_id, cluster, start_time) UNIQUE (job_id, hpc_cluster, start_time)
); );
ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster; ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster;
INSERT INTO job_new SELECT * FROM job;
CREATE TABLE IF NOT EXISTS lookup_exclusive (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
INSERT INTO lookup_exclusive (id, name) VALUES
(0, 'multi_user'),
(1, 'none'),
(2, 'single_user');
INSERT INTO job_new (
id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy,
energy_footprint, footprint
) SELECT
id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
num_nodes, num_hwthreads, num_acc, smt, (SELECT name FROM lookup_exclusive WHERE id=job.exclusive), monitoring_status, energy,
energy_footprint, footprint
FROM job;
DROP TABLE lookup_exclusive;
DROP TABLE job; DROP TABLE job;
ALTER TABLE job_new RENAME TO job; ALTER TABLE job_new RENAME TO job;

View File

@@ -21,10 +21,9 @@ import (
// GraphQL validation should make sure that no unkown values can be specified. // GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{ var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.hpc_user", model.AggregateUser: "job.hpc_user",
model.AggregateProject: "job.project", model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster", model.AggregateCluster: "job.hpc_cluster",
model.AggregateSubcluster: "job.subcluster",
} }
var sortBy2column = map[model.SortByAggregate]string{ var sortBy2column = map[model.SortByAggregate]string{

Binary file not shown.

View File

@@ -1 +1,2 @@
vasp vasp
VASP

View File

@@ -8,7 +8,7 @@
], ],
"metrics": ["cpu_load"], "metrics": ["cpu_load"],
"requirements": [ "requirements": [
"job.exclusive == 1", "job.shared == \"none\"",
"job.duration > job_min_duration_seconds" "job.duration > job_min_duration_seconds"
], ],
"variables": [ "variables": [

View File

@@ -4,7 +4,7 @@
"parameters": ["job_min_duration_seconds"], "parameters": ["job_min_duration_seconds"],
"metrics": ["flops_any", "mem_bw"], "metrics": ["flops_any", "mem_bw"],
"requirements": [ "requirements": [
"job.exclusive == 1", "job.shared == \"none\"",
"job.duration > job_min_duration_seconds" "job.duration > job_min_duration_seconds"
], ],
"variables": [ "variables": [

View File

@@ -8,7 +8,7 @@
], ],
"metrics": ["cpu_load"], "metrics": ["cpu_load"],
"requirements": [ "requirements": [
"job.exclusive == 1", "job.shared == \"none\"",
"job.duration > job_min_duration_seconds" "job.duration > job_min_duration_seconds"
], ],
"variables": [ "variables": [

View File

@@ -26,9 +26,9 @@ func RegisterCommitJobService() {
gocron.NewTask( gocron.NewTask(
func() { func() {
start := time.Now() start := time.Now()
cclog.Printf("Jobcache sync started at %s", start.Format(time.RFC3339)) cclog.Printf("Jobcache sync started at %s\n", start.Format(time.RFC3339))
jobs, _ := jobRepo.SyncJobs() jobs, _ := jobRepo.SyncJobs()
repository.CallJobStartHooks(jobs) repository.CallJobStartHooks(jobs)
cclog.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start)) cclog.Printf("Jobcache sync and job callbacks are done and took %s\n", time.Since(start))
})) }))
} }

View File

@@ -68,7 +68,7 @@ func Start(cronCfg, archiveConfig json.RawMessage) {
dec := json.NewDecoder(bytes.NewReader(cronCfg)) dec := json.NewDecoder(bytes.NewReader(cronCfg))
dec.DisallowUnknownFields() dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil { if err := dec.Decode(&Keys); err != nil {
cclog.Errorf("error while decoding ldap config: %v", err) cclog.Errorf("error while decoding cron config: %v", err)
} }
var cfg struct { var cfg struct {

View File

@@ -25,8 +25,8 @@ func RegisterUpdateDurationWorker() {
gocron.NewTask( gocron.NewTask(
func() { func() {
start := time.Now() start := time.Now()
cclog.Printf("Update duration started at %s", start.Format(time.RFC3339)) cclog.Printf("Update duration started at %s\n", start.Format(time.RFC3339))
jobRepo.UpdateDuration() jobRepo.UpdateDuration()
cclog.Printf("Update duration is done and took %s", time.Since(start)) cclog.Printf("Update duration is done and took %s\n", time.Since(start))
})) }))
} }

View File

@@ -34,7 +34,7 @@ func RegisterFootprintWorker() {
c := 0 c := 0
ce := 0 ce := 0
cl := 0 cl := 0
cclog.Printf("Update Footprints started at %s", s.Format(time.RFC3339)) cclog.Printf("Update Footprints started at %s\n", s.Format(time.RFC3339))
for _, cluster := range archive.Clusters { for _, cluster := range archive.Clusters {
s_cluster := time.Now() s_cluster := time.Now()
@@ -134,8 +134,8 @@ func RegisterFootprintWorker() {
} }
jobRepo.TransactionEnd(t) jobRepo.TransactionEnd(t)
} }
cclog.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster)) cclog.Debugf("Finish Cluster %s, took %s\n", cluster.Name, time.Since(s_cluster))
} }
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s)) cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s\n", c, cl, ce, time.Since(s))
})) }))
} }

View File

@@ -8,6 +8,8 @@ import (
"errors" "errors"
"fmt" "fmt"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger" cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema" "github.com/ClusterCockpit/cc-lib/schema"
) )
@@ -31,6 +33,8 @@ func initClusterConfig() error {
return err return err
} }
memorystore.Clusters = append(memorystore.Clusters, cluster.Name)
if len(cluster.Name) == 0 || if len(cluster.Name) == 0 ||
len(cluster.MetricConfig) == 0 || len(cluster.MetricConfig) == 0 ||
len(cluster.SubClusters) == 0 { len(cluster.SubClusters) == 0 {
@@ -122,6 +126,16 @@ func initClusterConfig() error {
} }
ml.Availability = append(metricLookup[mc.Name].Availability, availability) ml.Availability = append(metricLookup[mc.Name].Availability, availability)
metricLookup[mc.Name] = ml metricLookup[mc.Name] = ml
agg, err := config.AssignAggregationStratergy(mc.Aggregation)
if err != nil {
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
}
config.AddMetric(mc.Name, config.MetricConfig{
Frequency: int64(mc.Timestep),
Aggregation: agg,
})
} }
Clusters = append(Clusters, cluster) Clusters = append(Clusters, cluster)

View File

@@ -1,194 +1,194 @@
{ {
"exclusive": 1, "shared": "none",
"jobId": 1403244, "jobId": 1403244,
"statistics": { "statistics": {
"mem_bw": { "mem_bw": {
"avg": 63.57, "avg": 63.57,
"min": 0, "min": 0,
"unit": { "unit": {
"base": "B/s", "base": "B/s",
"prefix": "G" "prefix": "G"
}, },
"max": 74.5 "max": 74.5
},
"rapl_power": {
"avg": 228.07,
"min": 0,
"unit": {
"base": "W"
},
"max": 258.56
},
"ipc": {
"unit": {
"base": "IPC"
},
"max": 0.510204081632653,
"avg": 1.53846153846154,
"min": 0.0
},
"clock": {
"min": 1380.32,
"avg": 2599.39,
"unit": {
"base": "Hz",
"prefix": "M"
},
"max": 2634.46
},
"cpu_load": {
"avg": 18.4,
"min": 0,
"max": 23.58,
"unit": {
"base": "load"
}
},
"flops_any": {
"max": 404.62,
"unit": {
"base": "F/s",
"prefix": "G"
},
"avg": 225.59,
"min": 0
},
"flops_dp": {
"max": 0.24,
"unit": {
"base": "F/s",
"prefix": "G"
},
"min": 0,
"avg": 0
},
"mem_used": {
"min": 1.55,
"avg": 27.84,
"unit": {
"base": "B",
"prefix": "G"
},
"max": 37.5
},
"flops_sp": {
"min": 0,
"avg": 225.59,
"max": 404.62,
"unit": {
"base": "F/s",
"prefix": "G"
}
}
}, },
"resources": [ "rapl_power": {
{ "avg": 228.07,
"hostname": "e0102" "min": 0,
}, "unit": {
{ "base": "W"
"hostname": "e0103" },
}, "max": 258.56
{ },
"hostname": "e0105" "ipc": {
}, "unit": {
{ "base": "IPC"
"hostname": "e0106" },
}, "max": 0.510204081632653,
{ "avg": 1.53846153846154,
"hostname": "e0107" "min": 0.0
}, },
{ "clock": {
"hostname": "e0108" "min": 1380.32,
}, "avg": 2599.39,
{ "unit": {
"hostname": "e0114" "base": "Hz",
}, "prefix": "M"
{ },
"hostname": "e0320" "max": 2634.46
}, },
{ "cpu_load": {
"hostname": "e0321" "avg": 18.4,
}, "min": 0,
{ "max": 23.58,
"hostname": "e0325" "unit": {
}, "base": "load"
{ }
"hostname": "e0404" },
}, "flops_any": {
{ "max": 404.62,
"hostname": "e0415" "unit": {
}, "base": "F/s",
{ "prefix": "G"
"hostname": "e0433" },
}, "avg": 225.59,
{ "min": 0
"hostname": "e0437" },
}, "flops_dp": {
{ "max": 0.24,
"hostname": "e0439" "unit": {
}, "base": "F/s",
{ "prefix": "G"
"hostname": "e0501" },
}, "min": 0,
{ "avg": 0
"hostname": "e0503" },
}, "mem_used": {
{ "min": 1.55,
"hostname": "e0505" "avg": 27.84,
}, "unit": {
{ "base": "B",
"hostname": "e0506" "prefix": "G"
}, },
{ "max": 37.5
"hostname": "e0512" },
}, "flops_sp": {
{ "min": 0,
"hostname": "e0513" "avg": 225.59,
}, "max": 404.62,
{ "unit": {
"hostname": "e0514" "base": "F/s",
}, "prefix": "G"
{ }
"hostname": "e0653" }
}, },
{ "resources": [
"hostname": "e0701" {
}, "hostname": "e0102"
{ },
"hostname": "e0716" {
}, "hostname": "e0103"
{ },
"hostname": "e0727" {
}, "hostname": "e0105"
{ },
"hostname": "e0728" {
}, "hostname": "e0106"
{ },
"hostname": "e0925" {
}, "hostname": "e0107"
{ },
"hostname": "e0926" {
}, "hostname": "e0108"
{ },
"hostname": "e0929" {
}, "hostname": "e0114"
{ },
"hostname": "e0934" {
}, "hostname": "e0320"
{ },
"hostname": "e0951" {
} "hostname": "e0321"
], },
"walltime": 10, {
"jobState": "completed", "hostname": "e0325"
"cluster": "emmy", },
"subCluster": "haswell", {
"stopTime": 1609009562, "hostname": "e0404"
"user": "emmyUser6", },
"startTime": 1608923076, {
"partition": "work", "hostname": "e0415"
"tags": [], },
"project": "no project", {
"numNodes": 32, "hostname": "e0433"
"duration": 86486 },
{
"hostname": "e0437"
},
{
"hostname": "e0439"
},
{
"hostname": "e0501"
},
{
"hostname": "e0503"
},
{
"hostname": "e0505"
},
{
"hostname": "e0506"
},
{
"hostname": "e0512"
},
{
"hostname": "e0513"
},
{
"hostname": "e0514"
},
{
"hostname": "e0653"
},
{
"hostname": "e0701"
},
{
"hostname": "e0716"
},
{
"hostname": "e0727"
},
{
"hostname": "e0728"
},
{
"hostname": "e0925"
},
{
"hostname": "e0926"
},
{
"hostname": "e0929"
},
{
"hostname": "e0934"
},
{
"hostname": "e0951"
}
],
"walltime": 10,
"jobState": "completed",
"cluster": "emmy",
"subCluster": "haswell",
"stopTime": 1609009562,
"user": "emmyUser6",
"startTime": 1608923076,
"partition": "work",
"tags": [],
"project": "no project",
"numNodes": 32,
"duration": 86486
} }

View File

@@ -1,194 +1,194 @@
{ {
"stopTime": 1609387081, "stopTime": 1609387081,
"resources": [ "resources": [
{ {
"hostname": "e0151" "hostname": "e0151"
},
{
"hostname": "e0152"
},
{
"hostname": "e0153"
},
{
"hostname": "e0232"
},
{
"hostname": "e0303"
},
{
"hostname": "e0314"
},
{
"hostname": "e0344"
},
{
"hostname": "e0345"
},
{
"hostname": "e0348"
},
{
"hostname": "e0507"
},
{
"hostname": "e0518"
},
{
"hostname": "e0520"
},
{
"hostname": "e0522"
},
{
"hostname": "e0526"
},
{
"hostname": "e0527"
},
{
"hostname": "e0528"
},
{
"hostname": "e0530"
},
{
"hostname": "e0551"
},
{
"hostname": "e0604"
},
{
"hostname": "e0613"
},
{
"hostname": "e0634"
},
{
"hostname": "e0639"
},
{
"hostname": "e0640"
},
{
"hostname": "e0651"
},
{
"hostname": "e0653"
},
{
"hostname": "e0701"
},
{
"hostname": "e0704"
},
{
"hostname": "e0751"
},
{
"hostname": "e0809"
},
{
"hostname": "e0814"
},
{
"hostname": "e0819"
},
{
"hostname": "e0908"
}
],
"walltime": 10,
"cluster": "emmy",
"subCluster": "haswell",
"jobState": "completed",
"statistics": {
"clock": {
"max": 2634.9,
"unit": {
"base": "Hz",
"prefix": "M"
},
"min": 0,
"avg": 2597.8
},
"cpu_load": {
"max": 27.41,
"min": 0,
"avg": 18.39,
"unit": {
"base": "load"
}
},
"mem_bw": {
"min": 0,
"avg": 63.23,
"unit": {
"base": "B/s",
"prefix": "G"
},
"max": 75.06
},
"ipc": {
"min": 0.0,
"avg": 1.53846153846154,
"unit": {
"base": "IPC"
},
"max": 0.490196078431373
},
"rapl_power": {
"min": 0,
"avg": 227.32,
"unit": {
"base": "W"
},
"max": 256.22
},
"mem_used": {
"min": 1.5,
"avg": 27.77,
"unit": {
"base": "B",
"prefix": "G"
},
"max": 37.43
},
"flops_sp": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"max": 413.21,
"min": 0,
"avg": 224.41
},
"flops_dp": {
"max": 5.72,
"unit": {
"base": "F/s",
"prefix": "G"
},
"min": 0,
"avg": 0
},
"flops_any": {
"min": 0,
"avg": 224.42,
"max": 413.21,
"unit": {
"base": "F/s",
"prefix": "G"
}
}
}, },
"exclusive": 1, {
"jobId": 1404397, "hostname": "e0152"
"tags": [], },
"partition": "work", {
"project": "no project", "hostname": "e0153"
"user": "emmyUser6", },
"startTime": 1609300556, {
"duration": 86525, "hostname": "e0232"
"numNodes": 32 },
{
"hostname": "e0303"
},
{
"hostname": "e0314"
},
{
"hostname": "e0344"
},
{
"hostname": "e0345"
},
{
"hostname": "e0348"
},
{
"hostname": "e0507"
},
{
"hostname": "e0518"
},
{
"hostname": "e0520"
},
{
"hostname": "e0522"
},
{
"hostname": "e0526"
},
{
"hostname": "e0527"
},
{
"hostname": "e0528"
},
{
"hostname": "e0530"
},
{
"hostname": "e0551"
},
{
"hostname": "e0604"
},
{
"hostname": "e0613"
},
{
"hostname": "e0634"
},
{
"hostname": "e0639"
},
{
"hostname": "e0640"
},
{
"hostname": "e0651"
},
{
"hostname": "e0653"
},
{
"hostname": "e0701"
},
{
"hostname": "e0704"
},
{
"hostname": "e0751"
},
{
"hostname": "e0809"
},
{
"hostname": "e0814"
},
{
"hostname": "e0819"
},
{
"hostname": "e0908"
}
],
"walltime": 10,
"cluster": "emmy",
"subCluster": "haswell",
"jobState": "completed",
"statistics": {
"clock": {
"max": 2634.9,
"unit": {
"base": "Hz",
"prefix": "M"
},
"min": 0,
"avg": 2597.8
},
"cpu_load": {
"max": 27.41,
"min": 0,
"avg": 18.39,
"unit": {
"base": "load"
}
},
"mem_bw": {
"min": 0,
"avg": 63.23,
"unit": {
"base": "B/s",
"prefix": "G"
},
"max": 75.06
},
"ipc": {
"min": 0.0,
"avg": 1.53846153846154,
"unit": {
"base": "IPC"
},
"max": 0.490196078431373
},
"rapl_power": {
"min": 0,
"avg": 227.32,
"unit": {
"base": "W"
},
"max": 256.22
},
"mem_used": {
"min": 1.5,
"avg": 27.77,
"unit": {
"base": "B",
"prefix": "G"
},
"max": 37.43
},
"flops_sp": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"max": 413.21,
"min": 0,
"avg": 224.41
},
"flops_dp": {
"max": 5.72,
"unit": {
"base": "F/s",
"prefix": "G"
},
"min": 0,
"avg": 0
},
"flops_any": {
"min": 0,
"avg": 224.42,
"max": 413.21,
"unit": {
"base": "F/s",
"prefix": "G"
}
}
},
"shared": "none",
"jobId": 1404397,
"tags": [],
"partition": "work",
"project": "no project",
"user": "emmyUser6",
"startTime": 1609300556,
"duration": 86525,
"numNodes": 32
} }

View File

@@ -13,5 +13,7 @@ else
cp ./configs/config-demo.json config.json cp ./configs/config-demo.json config.json
./cc-backend -migrate-db ./cc-backend -migrate-db
./cc-backend -server -dev -init-db -add-user demo:admin:demo ./cc-backend -dev -init-db -add-user demo:admin,api:demo
./cc-backend -server -dev
fi fi

View File

@@ -56,7 +56,7 @@
job(id: "${dbid}") { job(id: "${dbid}") {
id, jobId, user, project, cluster, startTime, id, jobId, user, project, cluster, startTime,
duration, numNodes, numHWThreads, numAcc, energy, duration, numNodes, numHWThreads, numAcc, energy,
SMT, exclusive, partition, subCluster, arrayJobId, SMT, shared, partition, subCluster, arrayJobId,
monitoringStatus, state, walltime, monitoringStatus, state, walltime,
tags { id, type, scope, name }, tags { id, type, scope, name },
resources { hostname, hwthreads, accelerators }, resources { hostname, hwthreads, accelerators },
@@ -325,7 +325,7 @@
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit} metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope} nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope}
presetScopes={item.data.map((x) => x.scope)} presetScopes={item.data.map((x) => x.scope)}
isShared={$initq.data.job.exclusive != 1} isShared={$initq.data.job.shared != "none"}
/> />
{:else if item.disabled == true} {:else if item.disabled == true}
<Card color="info"> <Card color="info">

View File

@@ -69,7 +69,7 @@
hostname hostname
} }
SMT SMT
exclusive shared
partition partition
arrayJobId arrayJobId
monitoringStatus monitoringStatus

View File

@@ -172,7 +172,7 @@
{job.numNodes} {job.numNodes}
{/if} {/if}
<Icon name="pc-horizontal" /> <Icon name="pc-horizontal" />
{#if job.exclusive != 1} {#if job.shared != "none"}
(shared) (shared)
{/if} {/if}
{#if job.numAcc > 0} {#if job.numAcc > 0}

View File

@@ -213,7 +213,7 @@
metric={metric.data.name} metric={metric.data.name}
cluster={cluster.find((c) => c.name == job.cluster)} cluster={cluster.find((c) => c.name == job.cluster)}
subCluster={job.subCluster} subCluster={job.subCluster}
isShared={job.exclusive != 1} isShared={job.shared != "none"}
numhwthreads={job.numHWThreads} numhwthreads={job.numHWThreads}
numaccs={job.numAcc} numaccs={job.numAcc}
zoomState={zoomStates[metric.data.name] || null} zoomState={zoomStates[metric.data.name] || null}

View File

@@ -92,7 +92,7 @@
Missing Metric Missing Metric
</Button> </Button>
</InputGroup> </InputGroup>
{:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].exclusive} {:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].shared == "none"}
<InputGroup> <InputGroup>
<InputGroupText> <InputGroupText>
<Icon name="circle-fill"/> <Icon name="circle-fill"/>
@@ -104,7 +104,7 @@
Exclusive Exclusive
</Button> </Button>
</InputGroup> </InputGroup>
{:else if nodeJobsData.jobs.count >= 1 && !nodeJobsData.jobs.items[0].exclusive} {:else if nodeJobsData.jobs.count >= 1 && !(nodeJobsData.jobs.items[0].shared == "none")}
<InputGroup> <InputGroup>
<InputGroupText> <InputGroupText>
<Icon name="circle-half"/> <Icon name="circle-half"/>

View File

@@ -45,7 +45,7 @@
jobId jobId
user user
project project
exclusive shared
resources { resources {
hostname hostname
accelerators accelerators
@@ -101,7 +101,7 @@
function buildExtendedLegend() { function buildExtendedLegend() {
let pendingExtendedLegendData = null let pendingExtendedLegendData = null
// Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes] // Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes]
if ($nodeJobsData.data.jobs.count >= 1) { // "&& !$nodeJobsData.data.jobs.items[0].exclusive)" if ($nodeJobsData.data.jobs.count >= 1) {
const accSet = Array.from(new Set($nodeJobsData.data.jobs.items const accSet = Array.from(new Set($nodeJobsData.data.jobs.items
.map((i) => i.resources .map((i) => i.resources
.filter((r) => (r.hostname === nodeData.host) && r?.accelerators) .filter((r) => (r.hostname === nodeData.host) && r?.accelerators)