mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-20 14:57:30 +01:00
Merge pull request #405 from ClusterCockpit/metricstore
Metricstore Integration
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
|||||||
- name: Install Go
|
- name: Install Go
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: 1.24.x
|
go-version: 1.25.x
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: Build, Vet & Test
|
- name: Build, Vet & Test
|
||||||
|
|||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -9,6 +9,11 @@
|
|||||||
/var/*.db
|
/var/*.db
|
||||||
/var/*.txt
|
/var/*.txt
|
||||||
|
|
||||||
|
/var/checkpoints*
|
||||||
|
|
||||||
|
migrateTimestamps.pl
|
||||||
|
test_ccms_write_api.sh
|
||||||
|
|
||||||
/web/frontend/public/build
|
/web/frontend/public/build
|
||||||
/web/frontend/node_modules
|
/web/frontend/node_modules
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ type Job {
|
|||||||
numAcc: Int!
|
numAcc: Int!
|
||||||
energy: Float!
|
energy: Float!
|
||||||
SMT: Int!
|
SMT: Int!
|
||||||
exclusive: Int!
|
shared: String!
|
||||||
partition: String!
|
partition: String!
|
||||||
arrayJobId: Int!
|
arrayJobId: Int!
|
||||||
monitoringStatus: Int!
|
monitoringStatus: Int!
|
||||||
@@ -425,7 +425,7 @@ input JobFilter {
|
|||||||
startTime: TimeRange
|
startTime: TimeRange
|
||||||
state: [JobState!]
|
state: [JobState!]
|
||||||
metricStats: [MetricStatItem!]
|
metricStats: [MetricStatItem!]
|
||||||
exclusive: Int
|
shared: String
|
||||||
node: StringInput
|
node: StringInput
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1394,12 +1394,6 @@
|
|||||||
"format": "float64"
|
"format": "float64"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"exclusive": {
|
|
||||||
"type": "integer",
|
|
||||||
"maximum": 2,
|
|
||||||
"minimum": 0,
|
|
||||||
"example": 1
|
|
||||||
},
|
|
||||||
"footprint": {
|
"footprint": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
@@ -1416,12 +1410,18 @@
|
|||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"enum": [
|
"enum": [
|
||||||
"completed",
|
"boot_fail",
|
||||||
"failed",
|
|
||||||
"cancelled",
|
"cancelled",
|
||||||
"stopped",
|
"completed",
|
||||||
"timeout",
|
"deadline",
|
||||||
"out_of_memory"
|
"failed",
|
||||||
|
"node_fail",
|
||||||
|
"out-of-memory",
|
||||||
|
"pending",
|
||||||
|
"preempted",
|
||||||
|
"running",
|
||||||
|
"suspended",
|
||||||
|
"timeout"
|
||||||
],
|
],
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{
|
{
|
||||||
@@ -1477,6 +1477,14 @@
|
|||||||
"$ref": "#/definitions/schema.Resource"
|
"$ref": "#/definitions/schema.Resource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"shared": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"none",
|
||||||
|
"single_user",
|
||||||
|
"multi_user"
|
||||||
|
]
|
||||||
|
},
|
||||||
"smt": {
|
"smt": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 4
|
"example": 4
|
||||||
|
|||||||
@@ -207,11 +207,6 @@ definitions:
|
|||||||
format: float64
|
format: float64
|
||||||
type: number
|
type: number
|
||||||
type: object
|
type: object
|
||||||
exclusive:
|
|
||||||
example: 1
|
|
||||||
maximum: 2
|
|
||||||
minimum: 0
|
|
||||||
type: integer
|
|
||||||
footprint:
|
footprint:
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
format: float64
|
format: float64
|
||||||
@@ -226,12 +221,18 @@ definitions:
|
|||||||
allOf:
|
allOf:
|
||||||
- $ref: '#/definitions/schema.JobState'
|
- $ref: '#/definitions/schema.JobState'
|
||||||
enum:
|
enum:
|
||||||
- completed
|
- boot_fail
|
||||||
- failed
|
|
||||||
- cancelled
|
- cancelled
|
||||||
- stopped
|
- completed
|
||||||
|
- deadline
|
||||||
|
- failed
|
||||||
|
- node_fail
|
||||||
|
- out-of-memory
|
||||||
|
- pending
|
||||||
|
- preempted
|
||||||
|
- running
|
||||||
|
- suspended
|
||||||
- timeout
|
- timeout
|
||||||
- out_of_memory
|
|
||||||
example: completed
|
example: completed
|
||||||
metaData:
|
metaData:
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
@@ -269,6 +270,12 @@ definitions:
|
|||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.Resource'
|
$ref: '#/definitions/schema.Resource'
|
||||||
type: array
|
type: array
|
||||||
|
shared:
|
||||||
|
enum:
|
||||||
|
- none
|
||||||
|
- single_user
|
||||||
|
- multi_user
|
||||||
|
type: string
|
||||||
smt:
|
smt:
|
||||||
example: 4
|
example: 4
|
||||||
type: integer
|
type: integer
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
||||||
@@ -96,6 +97,12 @@ func main() {
|
|||||||
} else {
|
} else {
|
||||||
cclog.Abort("Cluster configuration must be present")
|
cclog.Abort("Cluster configuration must be present")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if mscfg := ccconf.GetPackageConfig("metric-store"); mscfg != nil {
|
||||||
|
config.InitMetricStore(mscfg)
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Metric Store configuration must be present")
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
cclog.Abort("Main configuration must be present")
|
cclog.Abort("Main configuration must be present")
|
||||||
}
|
}
|
||||||
@@ -201,7 +208,7 @@ func main() {
|
|||||||
if archiveCfg := ccconf.GetPackageConfig("archive"); archiveCfg != nil {
|
if archiveCfg := ccconf.GetPackageConfig("archive"); archiveCfg != nil {
|
||||||
err = archive.Init(archiveCfg, config.Keys.DisableArchive)
|
err = archive.Init(archiveCfg, config.Keys.DisableArchive)
|
||||||
} else {
|
} else {
|
||||||
err = archive.Init(json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`), config.Keys.DisableArchive)
|
err = archive.Init(json.RawMessage("{\"kind\":\"file\",\"path\":\"./var/job-archive\"}"), config.Keys.DisableArchive)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error())
|
cclog.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error())
|
||||||
@@ -241,13 +248,18 @@ func main() {
|
|||||||
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
//Metric Store starts after all flags have been processes
|
||||||
|
memorystore.Init(&wg)
|
||||||
|
|
||||||
archiver.Start(repository.GetJobRepository())
|
archiver.Start(repository.GetJobRepository())
|
||||||
|
|
||||||
|
// // Comment out
|
||||||
taskManager.Start(ccconf.GetPackageConfig("cron"),
|
taskManager.Start(ccconf.GetPackageConfig("cron"),
|
||||||
ccconf.GetPackageConfig("archive"))
|
ccconf.GetPackageConfig("archive"))
|
||||||
serverInit()
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
serverInit()
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||||
"github.com/ClusterCockpit/cc-backend/web"
|
"github.com/ClusterCockpit/cc-backend/web"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
@@ -118,6 +119,7 @@ func serverInit() {
|
|||||||
userapi := router.PathPrefix("/userapi").Subrouter()
|
userapi := router.PathPrefix("/userapi").Subrouter()
|
||||||
configapi := router.PathPrefix("/config").Subrouter()
|
configapi := router.PathPrefix("/config").Subrouter()
|
||||||
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
||||||
|
metricstoreapi := router.PathPrefix("/metricstore").Subrouter()
|
||||||
|
|
||||||
if !config.Keys.DisableAuthentication {
|
if !config.Keys.DisableAuthentication {
|
||||||
router.Handle("/login", authHandle.Login(
|
router.Handle("/login", authHandle.Login(
|
||||||
@@ -198,6 +200,14 @@ func serverInit() {
|
|||||||
onFailureResponse)
|
onFailureResponse)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
metricstoreapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthMetricStoreApi(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
configapi.Use(func(next http.Handler) http.Handler {
|
configapi.Use(func(next http.Handler) http.Handler {
|
||||||
return authHandle.AuthConfigApi(
|
return authHandle.AuthConfigApi(
|
||||||
// On success;
|
// On success;
|
||||||
@@ -231,6 +241,7 @@ func serverInit() {
|
|||||||
routerConfig.SetupRoutes(secured, buildInfo)
|
routerConfig.SetupRoutes(secured, buildInfo)
|
||||||
apiHandle.MountApiRoutes(securedapi)
|
apiHandle.MountApiRoutes(securedapi)
|
||||||
apiHandle.MountUserApiRoutes(userapi)
|
apiHandle.MountUserApiRoutes(userapi)
|
||||||
|
apiHandle.MountMetricStoreApiRoutes(metricstoreapi)
|
||||||
apiHandle.MountConfigApiRoutes(configapi)
|
apiHandle.MountConfigApiRoutes(configapi)
|
||||||
apiHandle.MountFrontendApiRoutes(frontendapi)
|
apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||||
|
|
||||||
@@ -325,6 +336,9 @@ func serverShutdown() {
|
|||||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||||
server.Shutdown(context.Background())
|
server.Shutdown(context.Background())
|
||||||
|
|
||||||
|
//Archive all the metric store data
|
||||||
|
memorystore.Shutdown()
|
||||||
|
|
||||||
// Then, wait for any async archivings still pending...
|
// Then, wait for any async archivings still pending...
|
||||||
archiver.WaitForArchiving()
|
archiver.WaitForArchiving()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,11 +4,27 @@
|
|||||||
"short-running-jobs-duration": 300,
|
"short-running-jobs-duration": 300,
|
||||||
"resampling": {
|
"resampling": {
|
||||||
"trigger": 30,
|
"trigger": 30,
|
||||||
"resolutions": [600, 300, 120, 60]
|
"resolutions": [
|
||||||
|
600,
|
||||||
|
300,
|
||||||
|
120,
|
||||||
|
60
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"apiAllowedIPs": ["*"],
|
"apiAllowedIPs": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
"emission-constant": 317
|
"emission-constant": 317
|
||||||
},
|
},
|
||||||
|
"cron": {
|
||||||
|
"commit-job-worker": "2m",
|
||||||
|
"duration-worker": "5m",
|
||||||
|
"footprint-worker": "10m"
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"kind": "file",
|
||||||
|
"path": "./var/job-archive"
|
||||||
|
},
|
||||||
"auth": {
|
"auth": {
|
||||||
"jwts": {
|
"jwts": {
|
||||||
"max-age": "2000h"
|
"max-age": "2000h"
|
||||||
@@ -18,9 +34,7 @@
|
|||||||
{
|
{
|
||||||
"name": "fritz",
|
"name": "fritz",
|
||||||
"metricDataRepository": {
|
"metricDataRepository": {
|
||||||
"kind": "cc-metric-store",
|
"kind": "cc-metric-store"
|
||||||
"url": "http://localhost:8082",
|
|
||||||
"token": ""
|
|
||||||
},
|
},
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": {
|
"numNodes": {
|
||||||
@@ -40,9 +54,7 @@
|
|||||||
{
|
{
|
||||||
"name": "alex",
|
"name": "alex",
|
||||||
"metricDataRepository": {
|
"metricDataRepository": {
|
||||||
"kind": "cc-metric-store",
|
"kind": "cc-metric-store"
|
||||||
"url": "http://localhost:8082",
|
|
||||||
"token": ""
|
|
||||||
},
|
},
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": {
|
"numNodes": {
|
||||||
@@ -59,5 +71,18 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
}
|
"metric-store": {
|
||||||
|
"checkpoints": {
|
||||||
|
"file-format": "avro",
|
||||||
|
"interval": "2h",
|
||||||
|
"directory": "./var/checkpoints",
|
||||||
|
"restore": "48h"
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"interval": "2h",
|
||||||
|
"directory": "./var/archive"
|
||||||
|
},
|
||||||
|
"retention-in-memory": "48h"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,13 +6,29 @@
|
|||||||
"user": "clustercockpit",
|
"user": "clustercockpit",
|
||||||
"group": "clustercockpit",
|
"group": "clustercockpit",
|
||||||
"validate": false,
|
"validate": false,
|
||||||
"apiAllowedIPs": ["*"],
|
"apiAllowedIPs": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
"short-running-jobs-duration": 300,
|
"short-running-jobs-duration": 300,
|
||||||
"resampling": {
|
"resampling": {
|
||||||
"trigger": 30,
|
"trigger": 30,
|
||||||
"resolutions": [600, 300, 120, 60]
|
"resolutions": [
|
||||||
|
600,
|
||||||
|
300,
|
||||||
|
120,
|
||||||
|
60
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"cron": {
|
||||||
|
"commit-job-worker": "2m",
|
||||||
|
"duration-worker": "5m",
|
||||||
|
"footprint-worker": "10m"
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"kind": "file",
|
||||||
|
"path": "./var/job-archive"
|
||||||
|
},
|
||||||
"clusters": [
|
"clusters": [
|
||||||
{
|
{
|
||||||
"name": "test",
|
"name": "test",
|
||||||
@@ -37,4 +53,4 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
28
go.mod
28
go.mod
@@ -6,10 +6,10 @@ toolchain go1.24.1
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/99designs/gqlgen v0.17.78
|
github.com/99designs/gqlgen v0.17.78
|
||||||
github.com/ClusterCockpit/cc-lib v0.7.0
|
github.com/ClusterCockpit/cc-lib v0.8.0
|
||||||
github.com/Masterminds/squirrel v1.5.4
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
github.com/coreos/go-oidc/v3 v3.12.0
|
github.com/coreos/go-oidc/v3 v3.12.0
|
||||||
github.com/expr-lang/expr v1.17.5
|
github.com/expr-lang/expr v1.17.6
|
||||||
github.com/go-co-op/gocron/v2 v2.16.0
|
github.com/go-co-op/gocron/v2 v2.16.0
|
||||||
github.com/go-ldap/ldap/v3 v3.4.10
|
github.com/go-ldap/ldap/v3 v3.4.10
|
||||||
github.com/go-sql-driver/mysql v1.9.0
|
github.com/go-sql-driver/mysql v1.9.0
|
||||||
@@ -19,19 +19,22 @@ require (
|
|||||||
github.com/gorilla/handlers v1.5.2
|
github.com/gorilla/handlers v1.5.2
|
||||||
github.com/gorilla/mux v1.8.1
|
github.com/gorilla/mux v1.8.1
|
||||||
github.com/gorilla/sessions v1.4.0
|
github.com/gorilla/sessions v1.4.0
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.2.1
|
||||||
github.com/jmoiron/sqlx v1.4.0
|
github.com/jmoiron/sqlx v1.4.0
|
||||||
github.com/joho/godotenv v1.5.1
|
github.com/joho/godotenv v1.5.1
|
||||||
|
github.com/linkedin/goavro/v2 v2.14.0
|
||||||
github.com/mattn/go-sqlite3 v1.14.24
|
github.com/mattn/go-sqlite3 v1.14.24
|
||||||
github.com/prometheus/client_golang v1.23.0
|
github.com/nats-io/nats.go v1.45.0
|
||||||
github.com/prometheus/common v0.65.0
|
github.com/prometheus/client_golang v1.23.2
|
||||||
|
github.com/prometheus/common v0.66.1
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||||
github.com/swaggo/http-swagger v1.3.4
|
github.com/swaggo/http-swagger v1.3.4
|
||||||
github.com/swaggo/swag v1.16.6
|
github.com/swaggo/swag v1.16.6
|
||||||
github.com/vektah/gqlparser/v2 v2.5.30
|
github.com/vektah/gqlparser/v2 v2.5.30
|
||||||
golang.org/x/crypto v0.40.0
|
golang.org/x/crypto v0.41.0
|
||||||
golang.org/x/oauth2 v0.30.0
|
golang.org/x/oauth2 v0.30.0
|
||||||
golang.org/x/time v0.5.0
|
golang.org/x/time v0.12.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@@ -51,6 +54,7 @@ require (
|
|||||||
github.com/go-openapi/spec v0.21.0 // indirect
|
github.com/go-openapi/spec v0.21.0 // indirect
|
||||||
github.com/go-openapi/swag v0.23.1 // indirect
|
github.com/go-openapi/swag v0.23.1 // indirect
|
||||||
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
||||||
|
github.com/golang/snappy v0.0.4 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||||
github.com/gorilla/websocket v1.5.3 // indirect
|
github.com/gorilla/websocket v1.5.3 // indirect
|
||||||
@@ -61,6 +65,7 @@ require (
|
|||||||
github.com/josharian/intern v1.0.0 // indirect
|
github.com/josharian/intern v1.0.0 // indirect
|
||||||
github.com/jpillora/backoff v1.0.0 // indirect
|
github.com/jpillora/backoff v1.0.0 // indirect
|
||||||
github.com/json-iterator/go v1.1.12 // indirect
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/klauspost/compress v1.18.0 // indirect
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||||
github.com/mailru/easyjson v0.9.0 // indirect
|
github.com/mailru/easyjson v0.9.0 // indirect
|
||||||
@@ -68,6 +73,8 @@ require (
|
|||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||||
|
github.com/nats-io/nkeys v0.4.11 // indirect
|
||||||
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
github.com/prometheus/client_model v0.6.2 // indirect
|
github.com/prometheus/client_model v0.6.2 // indirect
|
||||||
github.com/prometheus/procfs v0.16.1 // indirect
|
github.com/prometheus/procfs v0.16.1 // indirect
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
@@ -80,13 +87,12 @@ require (
|
|||||||
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
||||||
golang.org/x/mod v0.26.0 // indirect
|
golang.org/x/mod v0.26.0 // indirect
|
||||||
golang.org/x/net v0.42.0 // indirect
|
golang.org/x/net v0.43.0 // indirect
|
||||||
golang.org/x/sync v0.16.0 // indirect
|
golang.org/x/sync v0.16.0 // indirect
|
||||||
golang.org/x/sys v0.34.0 // indirect
|
golang.org/x/sys v0.35.0 // indirect
|
||||||
golang.org/x/text v0.27.0 // indirect
|
golang.org/x/text v0.28.0 // indirect
|
||||||
golang.org/x/tools v0.35.0 // indirect
|
golang.org/x/tools v0.35.0 // indirect
|
||||||
google.golang.org/protobuf v1.36.6 // indirect
|
google.golang.org/protobuf v1.36.8 // indirect
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
sigs.k8s.io/yaml v1.6.0 // indirect
|
sigs.k8s.io/yaml v1.6.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
79
go.sum
79
go.sum
@@ -6,16 +6,16 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25
|
|||||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
|
||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||||
github.com/ClusterCockpit/cc-lib v0.7.0 h1:THuSYrMcn9pSbrMditSI1LMOluq9TnM0/aVId4uK1Hc=
|
github.com/ClusterCockpit/cc-lib v0.8.0 h1:kQRMOx30CJCy+Q6TgCK9rarJnJ/CKZPWlIEdIXYlxoA=
|
||||||
github.com/ClusterCockpit/cc-lib v0.7.0/go.mod h1:TD1PS8pL2RDvEWaqs8VNejoTSm5OawI9Dcc0CTY/yWQ=
|
github.com/ClusterCockpit/cc-lib v0.8.0/go.mod h1:5xTwONu9pSp15mJ9CjBKGU9I3Jad8NfhrVHJZl50/yI=
|
||||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||||
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
|
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
|
||||||
github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
|
github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
|
||||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||||
github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0=
|
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
|
||||||
github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
||||||
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
|
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
|
||||||
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
|
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
|
||||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||||
@@ -38,6 +38,7 @@ github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc
|
|||||||
github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0=
|
github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||||
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
@@ -53,10 +54,14 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj
|
|||||||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||||
github.com/expr-lang/expr v1.17.5 h1:i1WrMvcdLF249nSNlpQZN1S6NXuW9WaOfF5tPi3aw3k=
|
github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec=
|
||||||
github.com/expr-lang/expr v1.17.5/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
|
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||||
|
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||||
|
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
||||||
|
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
||||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
||||||
@@ -91,6 +96,11 @@ github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeD
|
|||||||
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
||||||
github.com/golang-migrate/migrate/v4 v4.18.2 h1:2VSCMz7x7mjyTXx3m2zPokOY82LTRgxK1yQYKo6wWQ8=
|
github.com/golang-migrate/migrate/v4 v4.18.2 h1:2VSCMz7x7mjyTXx3m2zPokOY82LTRgxK1yQYKo6wWQ8=
|
||||||
github.com/golang-migrate/migrate/v4 v4.18.2/go.mod h1:2CM6tJvn2kqPXwnXO/d3rAQYiyoIm180VsO8PRX6Rpk=
|
github.com/golang-migrate/migrate/v4 v4.18.2/go.mod h1:2CM6tJvn2kqPXwnXO/d3rAQYiyoIm180VsO8PRX6Rpk=
|
||||||
|
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||||
|
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||||
|
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||||
|
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
@@ -127,6 +137,11 @@ github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjw
|
|||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
||||||
|
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
|
||||||
|
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
|
||||||
|
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
|
||||||
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
||||||
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
||||||
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
||||||
@@ -155,8 +170,11 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
|
|||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
||||||
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
|
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
|
||||||
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||||
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||||
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
|
||||||
@@ -166,6 +184,8 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm
|
|||||||
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
||||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||||
|
github.com/linkedin/goavro/v2 v2.14.0 h1:aNO/js65U+Mwq4yB5f1h01c3wiM458qtRad1DN0CMUI=
|
||||||
|
github.com/linkedin/goavro/v2 v2.14.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
|
||||||
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
|
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
|
||||||
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
|
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
|
||||||
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||||
@@ -187,12 +207,13 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
|
|||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||||
github.com/nats-io/nats.go v1.44.0 h1:ECKVrDLdh/kDPV1g0gAQ+2+m2KprqZK5O/eJAyAnH2M=
|
github.com/nats-io/nats.go v1.45.0 h1:/wGPbnYXDM0pLKFjZTX+2JOw9TQPoIgTFrUaH97giwA=
|
||||||
github.com/nats-io/nats.go v1.44.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
github.com/nats-io/nats.go v1.45.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||||
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
|
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
|
||||||
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
|
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
|
||||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||||
|
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||||
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
||||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||||
@@ -204,12 +225,12 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
|||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc=
|
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
|
||||||
github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE=
|
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||||
github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
|
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
|
||||||
github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
|
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
|
||||||
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||||
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
|
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
|
||||||
@@ -233,10 +254,11 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
|
|||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
|
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
|
||||||
github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
|
github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
|
||||||
github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww=
|
github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww=
|
||||||
@@ -273,8 +295,8 @@ golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliY
|
|||||||
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||||
golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=
|
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
|
||||||
golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=
|
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
@@ -295,8 +317,8 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
|||||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||||
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||||
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
|
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
|
||||||
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
|
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
|
||||||
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
|
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
|
||||||
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
|
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
|
||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
@@ -319,8 +341,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|||||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
|
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
|
||||||
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
@@ -339,10 +361,10 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
|||||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||||
golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
|
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
|
||||||
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
|
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
|
||||||
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
|
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
|
||||||
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
@@ -352,15 +374,16 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb
|
|||||||
golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
|
golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
|
||||||
golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
|
golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
|
||||||
|
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
|
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
|
||||||
|
|||||||
@@ -241,7 +241,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
"numNodes": 1,
|
"numNodes": 1,
|
||||||
"numHwthreads": 8,
|
"numHwthreads": 8,
|
||||||
"numAcc": 0,
|
"numAcc": 0,
|
||||||
"exclusive": 1,
|
"shared": "none",
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"resources": [
|
"resources": [
|
||||||
@@ -396,7 +396,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
"partition": "default",
|
"partition": "default",
|
||||||
"walltime": 3600,
|
"walltime": 3600,
|
||||||
"numNodes": 1,
|
"numNodes": 1,
|
||||||
"exclusive": 1,
|
"shared": "none",
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"resources": [
|
"resources": [
|
||||||
|
|||||||
@@ -1401,12 +1401,6 @@ const docTemplate = `{
|
|||||||
"format": "float64"
|
"format": "float64"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"exclusive": {
|
|
||||||
"type": "integer",
|
|
||||||
"maximum": 2,
|
|
||||||
"minimum": 0,
|
|
||||||
"example": 1
|
|
||||||
},
|
|
||||||
"footprint": {
|
"footprint": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
@@ -1423,12 +1417,18 @@ const docTemplate = `{
|
|||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"enum": [
|
"enum": [
|
||||||
"completed",
|
"boot_fail",
|
||||||
"failed",
|
|
||||||
"cancelled",
|
"cancelled",
|
||||||
"stopped",
|
"completed",
|
||||||
"timeout",
|
"deadline",
|
||||||
"out_of_memory"
|
"failed",
|
||||||
|
"node_fail",
|
||||||
|
"out-of-memory",
|
||||||
|
"pending",
|
||||||
|
"preempted",
|
||||||
|
"running",
|
||||||
|
"suspended",
|
||||||
|
"timeout"
|
||||||
],
|
],
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{
|
{
|
||||||
@@ -1484,6 +1484,14 @@ const docTemplate = `{
|
|||||||
"$ref": "#/definitions/schema.Resource"
|
"$ref": "#/definitions/schema.Resource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"shared": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"none",
|
||||||
|
"single_user",
|
||||||
|
"multi_user"
|
||||||
|
]
|
||||||
|
},
|
||||||
"smt": {
|
"smt": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 4
|
"example": 4
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
@@ -95,6 +96,19 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
|
|||||||
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) {
|
||||||
|
// REST API Uses TokenAuth
|
||||||
|
r.HandleFunc("/api/free", memorystore.HandleFree).Methods(http.MethodPost)
|
||||||
|
r.HandleFunc("/api/write", memorystore.HandleWrite).Methods(http.MethodPost)
|
||||||
|
r.HandleFunc("/api/debug", memorystore.HandleDebug).Methods(http.MethodGet)
|
||||||
|
r.HandleFunc("/api/healthcheck", memorystore.HandleHealthCheck).Methods(http.MethodGet)
|
||||||
|
// Refactor
|
||||||
|
r.HandleFunc("/api/free/", memorystore.HandleFree).Methods(http.MethodPost)
|
||||||
|
r.HandleFunc("/api/write/", memorystore.HandleWrite).Methods(http.MethodPost)
|
||||||
|
r.HandleFunc("/api/debug/", memorystore.HandleDebug).Methods(http.MethodGet)
|
||||||
|
r.HandleFunc("/api/healthcheck/", memorystore.HandleHealthCheck).Methods(http.MethodGet)
|
||||||
|
}
|
||||||
|
|
||||||
func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
|
func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
|
||||||
r.StrictSlash(true)
|
r.StrictSlash(true)
|
||||||
// Settings Frontend Uses SessionAuth
|
// Settings Frontend Uses SessionAuth
|
||||||
|
|||||||
@@ -417,6 +417,42 @@ func (auth *Authentication) AuthUserApi(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthMetricStoreApi(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Infof("auth metricstore api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if user != nil {
|
||||||
|
switch {
|
||||||
|
case len(user.Roles) == 1:
|
||||||
|
if user.HasRole(schema.RoleApi) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case len(user.Roles) >= 2:
|
||||||
|
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
cclog.Info("auth metricstore api -> authentication failed: missing role")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cclog.Info("auth metricstore api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (auth *Authentication) AuthConfigApi(
|
func (auth *Authentication) AuthConfigApi(
|
||||||
onsuccess http.Handler,
|
onsuccess http.Handler,
|
||||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
|||||||
475
internal/avro/avroCheckpoint.go
Normal file
475
internal/avro/avroCheckpoint.go
Normal file
@@ -0,0 +1,475 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package avro
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/linkedin/goavro/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
var NumWorkers int = 4
|
||||||
|
|
||||||
|
var ErrNoNewData error = errors.New("no data in the pool")
|
||||||
|
|
||||||
|
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
|
||||||
|
levels := make([]*AvroLevel, 0)
|
||||||
|
selectors := make([][]string, 0)
|
||||||
|
as.root.lock.RLock()
|
||||||
|
// Cluster
|
||||||
|
for sel1, l1 := range as.root.children {
|
||||||
|
l1.lock.RLock()
|
||||||
|
// Node
|
||||||
|
for sel2, l2 := range l1.children {
|
||||||
|
l2.lock.RLock()
|
||||||
|
// Frequency
|
||||||
|
for sel3, l3 := range l2.children {
|
||||||
|
levels = append(levels, l3)
|
||||||
|
selectors = append(selectors, []string{sel1, sel2, sel3})
|
||||||
|
}
|
||||||
|
l2.lock.RUnlock()
|
||||||
|
}
|
||||||
|
l1.lock.RUnlock()
|
||||||
|
}
|
||||||
|
as.root.lock.RUnlock()
|
||||||
|
|
||||||
|
type workItem struct {
|
||||||
|
level *AvroLevel
|
||||||
|
dir string
|
||||||
|
selector []string
|
||||||
|
}
|
||||||
|
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(NumWorkers)
|
||||||
|
work := make(chan workItem, NumWorkers*2)
|
||||||
|
for range NumWorkers {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
for workItem := range work {
|
||||||
|
from := getTimestamp(workItem.dir)
|
||||||
|
|
||||||
|
if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
|
||||||
|
if err == ErrNoNewData {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
} else {
|
||||||
|
atomic.AddInt32(&n, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range len(levels) {
|
||||||
|
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||||
|
work <- workItem{
|
||||||
|
level: levels[i],
|
||||||
|
dir: dir,
|
||||||
|
selector: selectors[i],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getTimestamp returns the timestamp from the directory name
|
||||||
|
func getTimestamp(dir string) int64 {
|
||||||
|
// Extract the resolution and timestamp from the directory name
|
||||||
|
// The existing avro file will be in epoch timestamp format
|
||||||
|
// iterate over all the files in the directory and find the maximum timestamp
|
||||||
|
// and return it
|
||||||
|
|
||||||
|
resolution := path.Base(dir)
|
||||||
|
dir = path.Dir(dir)
|
||||||
|
|
||||||
|
files, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
var maxTs int64 = 0
|
||||||
|
|
||||||
|
if len(files) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
if file.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := file.Name()
|
||||||
|
|
||||||
|
if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ts > maxTs {
|
||||||
|
maxTs = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interval, _ := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval)
|
||||||
|
updateTime := time.Unix(maxTs, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
|
||||||
|
|
||||||
|
if updateTime < time.Now().Unix() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxTs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
// fmt.Printf("Checkpointing directory: %s\n", dir)
|
||||||
|
// filepath contains the resolution
|
||||||
|
int_res, _ := strconv.Atoi(path.Base(dir))
|
||||||
|
|
||||||
|
// find smallest overall timestamp in l.data map and delete it from l.data
|
||||||
|
minTs := int64(1<<63 - 1)
|
||||||
|
for ts, dat := range l.data {
|
||||||
|
if ts < minTs && len(dat) != 0 {
|
||||||
|
minTs = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if from == 0 && minTs != int64(1<<63-1) {
|
||||||
|
from = minTs
|
||||||
|
}
|
||||||
|
|
||||||
|
if from == 0 {
|
||||||
|
return ErrNoNewData
|
||||||
|
}
|
||||||
|
|
||||||
|
var schema string
|
||||||
|
var codec *goavro.Codec
|
||||||
|
record_list := make([]map[string]any, 0)
|
||||||
|
|
||||||
|
var f *os.File
|
||||||
|
|
||||||
|
filePath := dir + fmt.Sprintf("_%d.avro", from)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
fp_, err_ := os.Stat(filePath)
|
||||||
|
if errors.Is(err_, os.ErrNotExist) {
|
||||||
|
err = os.MkdirAll(path.Dir(dir), 0o755)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create directory: %v", err)
|
||||||
|
}
|
||||||
|
} else if fp_.Size() != 0 {
|
||||||
|
f, err = os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open existing avro file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
reader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF reader: %v", err)
|
||||||
|
}
|
||||||
|
codec = reader.Codec()
|
||||||
|
schema = codec.Schema()
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
|
||||||
|
|
||||||
|
if dumpAll {
|
||||||
|
time_ref = time.Now().Unix()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty values
|
||||||
|
if len(l.data) == 0 {
|
||||||
|
// we checkpoint avro files every 60 seconds
|
||||||
|
repeat := 60 / int_res
|
||||||
|
|
||||||
|
for range repeat {
|
||||||
|
record_list = append(record_list, make(map[string]any))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readFlag := true
|
||||||
|
|
||||||
|
for ts := range l.data {
|
||||||
|
flag := false
|
||||||
|
if ts < time_ref {
|
||||||
|
data := l.data[ts]
|
||||||
|
|
||||||
|
schema_gen, err := generateSchema(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
flag, schema, err = compareSchema(schema, schema_gen)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to compare read and generated schema: %v", err)
|
||||||
|
}
|
||||||
|
if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
f, err = os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open Avro file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
ocfReader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for ocfReader.Scan() {
|
||||||
|
record, err := ocfReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read record: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
record_list = append(record_list, record.(map[string]any))
|
||||||
|
}
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
err = os.Remove(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to delete file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
readFlag = false
|
||||||
|
}
|
||||||
|
codec, err = goavro.NewCodec(schema)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create codec after merged schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
record_list = append(record_list, generateRecord(data))
|
||||||
|
delete(l.data, ts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(record_list) == 0 {
|
||||||
|
return ErrNoNewData
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to append new avro file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fmt.Printf("Codec : %#v\n", codec)
|
||||||
|
|
||||||
|
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
|
||||||
|
W: f,
|
||||||
|
Codec: codec,
|
||||||
|
CompressionName: goavro.CompressionDeflateLabel,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF writer: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append the new record
|
||||||
|
if err := writer.Append(record_list); err != nil {
|
||||||
|
return fmt.Errorf("failed to append record: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
|
||||||
|
var genSchema, readSchema AvroSchema
|
||||||
|
|
||||||
|
if schemaRead == "" {
|
||||||
|
return false, schemaGen, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmarshal the schema strings into AvroSchema structs
|
||||||
|
if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to parse read schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(genSchema.Fields, func(i, j int) bool {
|
||||||
|
return genSchema.Fields[i].Name < genSchema.Fields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
sort.Slice(readSchema.Fields, func(i, j int) bool {
|
||||||
|
return readSchema.Fields[i].Name < readSchema.Fields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check if schemas are identical
|
||||||
|
schemasEqual := true
|
||||||
|
if len(genSchema.Fields) <= len(readSchema.Fields) {
|
||||||
|
|
||||||
|
for i := range genSchema.Fields {
|
||||||
|
if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||||
|
schemasEqual = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If schemas are identical, return the read schema
|
||||||
|
if schemasEqual {
|
||||||
|
return false, schemaRead, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a map to hold unique fields from both schemas
|
||||||
|
fieldMap := make(map[string]AvroField)
|
||||||
|
|
||||||
|
// Add fields from the read schema
|
||||||
|
for _, field := range readSchema.Fields {
|
||||||
|
fieldMap[field.Name] = field
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add or update fields from the generated schema
|
||||||
|
for _, field := range genSchema.Fields {
|
||||||
|
fieldMap[field.Name] = field
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a union schema by collecting fields from the map
|
||||||
|
var mergedFields []AvroField
|
||||||
|
for _, field := range fieldMap {
|
||||||
|
mergedFields = append(mergedFields, field)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort fields by name for consistency
|
||||||
|
sort.Slice(mergedFields, func(i, j int) bool {
|
||||||
|
return mergedFields[i].Name < mergedFields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
// Create the merged schema
|
||||||
|
mergedSchema := AvroSchema{
|
||||||
|
Type: "record",
|
||||||
|
Name: genSchema.Name,
|
||||||
|
Fields: mergedFields,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if schemas are identical
|
||||||
|
schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
|
||||||
|
if schemasEqual {
|
||||||
|
for i := range mergedSchema.Fields {
|
||||||
|
if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||||
|
schemasEqual = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if schemasEqual {
|
||||||
|
return false, schemaRead, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal the merged schema back to JSON
|
||||||
|
mergedSchemaJson, err := json.Marshal(mergedSchema)
|
||||||
|
if err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, string(mergedSchemaJson), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateSchema(data map[string]schema.Float) (string, error) {
|
||||||
|
// Define the Avro schema structure
|
||||||
|
schema := map[string]any{
|
||||||
|
"type": "record",
|
||||||
|
"name": "DataRecord",
|
||||||
|
"fields": []map[string]any{},
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldTracker := make(map[string]struct{})
|
||||||
|
|
||||||
|
for key := range data {
|
||||||
|
if _, exists := fieldTracker[key]; !exists {
|
||||||
|
key = correctKey(key)
|
||||||
|
|
||||||
|
field := map[string]any{
|
||||||
|
"name": key,
|
||||||
|
"type": "double",
|
||||||
|
"default": -1.0,
|
||||||
|
}
|
||||||
|
schema["fields"] = append(schema["fields"].([]map[string]any), field)
|
||||||
|
fieldTracker[key] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
schemaString, err := json.Marshal(schema)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to marshal schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(schemaString), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRecord(data map[string]schema.Float) map[string]any {
|
||||||
|
record := make(map[string]any)
|
||||||
|
|
||||||
|
// Iterate through each map in data
|
||||||
|
for key, value := range data {
|
||||||
|
key = correctKey(key)
|
||||||
|
|
||||||
|
// Set the value in the record
|
||||||
|
// avro only accepts basic types
|
||||||
|
record[key] = value.Double()
|
||||||
|
}
|
||||||
|
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func correctKey(key string) string {
|
||||||
|
// Replace any invalid characters in the key
|
||||||
|
// For example, replace spaces with underscores
|
||||||
|
key = strings.ReplaceAll(key, ":", "___")
|
||||||
|
key = strings.ReplaceAll(key, ".", "__")
|
||||||
|
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReplaceKey(key string) string {
|
||||||
|
// Replace any invalid characters in the key
|
||||||
|
// For example, replace spaces with underscores
|
||||||
|
key = strings.ReplaceAll(key, "___", ":")
|
||||||
|
key = strings.ReplaceAll(key, "__", ".")
|
||||||
|
|
||||||
|
return key
|
||||||
|
}
|
||||||
84
internal/avro/avroHelper.go
Normal file
84
internal/avro/avroHelper.go
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package avro
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log"
|
||||||
|
"slices"
|
||||||
|
"strconv"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
// AvroPool is a pool of Avro writers.
|
||||||
|
go func() {
|
||||||
|
if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
|
||||||
|
wg.Done() // Mark this goroutine as done
|
||||||
|
return // Exit the goroutine
|
||||||
|
}
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
var avroLevel *AvroLevel
|
||||||
|
oldSelector := make([]string, 0)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case val := <-LineProtocolMessages:
|
||||||
|
// Fetch the frequency of the metric from the global configuration
|
||||||
|
freq, err := config.GetMetricFrequency(val.MetricName)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Error fetching metric frequency: %s\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
metricName := ""
|
||||||
|
|
||||||
|
for _, selector_name := range val.Selector {
|
||||||
|
metricName += selector_name + Delimiter
|
||||||
|
}
|
||||||
|
|
||||||
|
metricName += val.MetricName
|
||||||
|
|
||||||
|
// Create a new selector for the Avro level
|
||||||
|
// The selector is a slice of strings that represents the path to the
|
||||||
|
// Avro level. It is created by appending the cluster, node, and metric
|
||||||
|
// name to the selector.
|
||||||
|
var selector []string
|
||||||
|
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||||
|
|
||||||
|
if !testEq(oldSelector, selector) {
|
||||||
|
// Get the Avro level for the metric
|
||||||
|
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||||
|
|
||||||
|
// If the Avro level is nil, create a new one
|
||||||
|
if avroLevel == nil {
|
||||||
|
log.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||||
|
}
|
||||||
|
oldSelector = slices.Clone(selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func testEq(a, b []string) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
167
internal/avro/avroStruct.go
Normal file
167
internal/avro/avroStruct.go
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package avro
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
LineProtocolMessages = make(chan *AvroStruct)
|
||||||
|
Delimiter = "ZZZZZ"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CheckpointBufferMinutes should always be in minutes.
|
||||||
|
// Its controls the amount of data to hold for given amount of time.
|
||||||
|
var CheckpointBufferMinutes = 3
|
||||||
|
|
||||||
|
type AvroStruct struct {
|
||||||
|
MetricName string
|
||||||
|
Cluster string
|
||||||
|
Node string
|
||||||
|
Selector []string
|
||||||
|
Value schema.Float
|
||||||
|
Timestamp int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroStore struct {
|
||||||
|
root AvroLevel
|
||||||
|
}
|
||||||
|
|
||||||
|
var avroStore AvroStore
|
||||||
|
|
||||||
|
type AvroLevel struct {
|
||||||
|
children map[string]*AvroLevel
|
||||||
|
data map[int64]map[string]schema.Float
|
||||||
|
lock sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroField struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Type any `json:"type"`
|
||||||
|
Default any `json:"default,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroSchema struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Fields []AvroField `json:"fields"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
|
||||||
|
if len(selector) == 0 {
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow concurrent reads:
|
||||||
|
l.lock.RLock()
|
||||||
|
var child *AvroLevel
|
||||||
|
var ok bool
|
||||||
|
if l.children == nil {
|
||||||
|
// Children map needs to be created...
|
||||||
|
l.lock.RUnlock()
|
||||||
|
} else {
|
||||||
|
child, ok := l.children[selector[0]]
|
||||||
|
l.lock.RUnlock()
|
||||||
|
if ok {
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The level does not exist, take write lock for unqiue access:
|
||||||
|
l.lock.Lock()
|
||||||
|
// While this thread waited for the write lock, another thread
|
||||||
|
// could have created the child node.
|
||||||
|
if l.children != nil {
|
||||||
|
child, ok = l.children[selector[0]]
|
||||||
|
if ok {
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
child = &AvroLevel{
|
||||||
|
data: make(map[int64]map[string]schema.Float, 0),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
if l.children != nil {
|
||||||
|
l.children[selector[0]] = child
|
||||||
|
} else {
|
||||||
|
l.children = map[string]*AvroLevel{selector[0]: child}
|
||||||
|
}
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
|
||||||
|
|
||||||
|
// Create keys in advance for the given amount of time
|
||||||
|
if len(l.data) != KeyCounter {
|
||||||
|
if len(l.data) == 0 {
|
||||||
|
for i := range KeyCounter {
|
||||||
|
l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Get the last timestamp
|
||||||
|
var lastTs int64
|
||||||
|
for ts := range l.data {
|
||||||
|
if ts > lastTs {
|
||||||
|
lastTs = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create keys for the next KeyCounter timestamps
|
||||||
|
l.data[lastTs+int64(Freq)] = make(map[string]schema.Float, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
closestTs := int64(0)
|
||||||
|
minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
|
||||||
|
found := false
|
||||||
|
|
||||||
|
// Iterate over timestamps and choose the one which is within range.
|
||||||
|
// Since its epoch time, we check if the difference is less than 60 seconds.
|
||||||
|
for ts, dat := range l.data {
|
||||||
|
// Check if timestamp is within range
|
||||||
|
diff := timestamp - ts
|
||||||
|
if diff < -int64(Freq) || diff > int64(Freq) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric already present at this timestamp — skip
|
||||||
|
if _, ok := dat[metricName]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is the closest timestamp so far
|
||||||
|
if Abs(diff) < minDiff {
|
||||||
|
minDiff = Abs(diff)
|
||||||
|
closestTs = ts
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if found {
|
||||||
|
l.data[closestTs][metricName] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetAvroStore() *AvroStore {
|
||||||
|
return &avroStore
|
||||||
|
}
|
||||||
|
|
||||||
|
// Abs returns the absolute value of x.
|
||||||
|
func Abs(x int64) int64 {
|
||||||
|
if x < 0 {
|
||||||
|
return -x
|
||||||
|
}
|
||||||
|
return x
|
||||||
|
}
|
||||||
@@ -162,7 +162,7 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
|
|||||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
if Clusters == nil || len(Clusters) < 1 {
|
if len(Clusters) < 1 {
|
||||||
cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
|
cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
128
internal/config/memorystore.go
Normal file
128
internal/config/memorystore.go
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// --------------------
|
||||||
|
// Metric Store config
|
||||||
|
// --------------------
|
||||||
|
type MetricStoreConfig struct {
|
||||||
|
Checkpoints struct {
|
||||||
|
FileFormat string `json:"file-format"`
|
||||||
|
Interval string `json:"interval"`
|
||||||
|
RootDir string `json:"directory"`
|
||||||
|
Restore string `json:"restore"`
|
||||||
|
} `json:"checkpoints"`
|
||||||
|
Debug struct {
|
||||||
|
DumpToFile string `json:"dump-to-file"`
|
||||||
|
EnableGops bool `json:"gops"`
|
||||||
|
} `json:"debug"`
|
||||||
|
RetentionInMemory string `json:"retention-in-memory"`
|
||||||
|
Archive struct {
|
||||||
|
Interval string `json:"interval"`
|
||||||
|
RootDir string `json:"directory"`
|
||||||
|
DeleteInstead bool `json:"delete-instead"`
|
||||||
|
} `json:"archive"`
|
||||||
|
Nats []*NatsConfig `json:"nats"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NatsConfig struct {
|
||||||
|
// Address of the nats server
|
||||||
|
Address string `json:"address"`
|
||||||
|
|
||||||
|
// Username/Password, optional
|
||||||
|
Username string `json:"username"`
|
||||||
|
Password string `json:"password"`
|
||||||
|
|
||||||
|
//Creds file path
|
||||||
|
Credsfilepath string `json:"creds-file-path"`
|
||||||
|
|
||||||
|
Subscriptions []struct {
|
||||||
|
// Channel name
|
||||||
|
SubscribeTo string `json:"subscribe-to"`
|
||||||
|
|
||||||
|
// Allow lines without a cluster tag, use this as default, optional
|
||||||
|
ClusterTag string `json:"cluster-tag"`
|
||||||
|
} `json:"subscriptions"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var MetricStoreKeys MetricStoreConfig
|
||||||
|
|
||||||
|
// For aggregation over multiple values at different cpus/sockets/..., not time!
|
||||||
|
type AggregationStrategy int
|
||||||
|
|
||||||
|
const (
|
||||||
|
NoAggregation AggregationStrategy = iota
|
||||||
|
SumAggregation
|
||||||
|
AvgAggregation
|
||||||
|
)
|
||||||
|
|
||||||
|
func AssignAggregationStratergy(str string) (AggregationStrategy, error) {
|
||||||
|
switch str {
|
||||||
|
case "":
|
||||||
|
return NoAggregation, nil
|
||||||
|
case "sum":
|
||||||
|
return SumAggregation, nil
|
||||||
|
case "avg":
|
||||||
|
return AvgAggregation, nil
|
||||||
|
default:
|
||||||
|
return NoAggregation, fmt.Errorf("[METRICSTORE]> unknown aggregation strategy: %s", str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricConfig struct {
|
||||||
|
// Interval in seconds at which measurements will arive.
|
||||||
|
Frequency int64
|
||||||
|
|
||||||
|
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
|
||||||
|
Aggregation AggregationStrategy
|
||||||
|
|
||||||
|
// Private, used internally...
|
||||||
|
Offset int
|
||||||
|
}
|
||||||
|
|
||||||
|
var Metrics map[string]MetricConfig
|
||||||
|
|
||||||
|
func InitMetricStore(msConfig json.RawMessage) {
|
||||||
|
// Validate(msConfigSchema, msConfig)
|
||||||
|
dec := json.NewDecoder(bytes.NewReader(msConfig))
|
||||||
|
dec.DisallowUnknownFields()
|
||||||
|
if err := dec.Decode(&MetricStoreKeys); err != nil {
|
||||||
|
cclog.Abortf("[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", msConfig, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetMetricFrequency(metricName string) (int64, error) {
|
||||||
|
if metric, ok := Metrics[metricName]; ok {
|
||||||
|
return metric.Frequency, nil
|
||||||
|
}
|
||||||
|
return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// add logic to add metrics. Redundant metrics should be updated with max frequency.
|
||||||
|
// use metric.Name to check if the metric already exists.
|
||||||
|
// if not, add it to the Metrics map.
|
||||||
|
func AddMetric(name string, metric MetricConfig) error {
|
||||||
|
|
||||||
|
if Metrics == nil {
|
||||||
|
Metrics = make(map[string]MetricConfig, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if existingMetric, ok := Metrics[name]; ok {
|
||||||
|
if existingMetric.Frequency != metric.Frequency {
|
||||||
|
if existingMetric.Frequency < metric.Frequency {
|
||||||
|
existingMetric.Frequency = metric.Frequency
|
||||||
|
Metrics[name] = existingMetric
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Metrics[name] = metric
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -144,7 +144,7 @@ var clustersSchema = `
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["kind", "url"]
|
"required": ["kind"]
|
||||||
},
|
},
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
||||||
|
|||||||
@@ -118,7 +118,6 @@ type ComplexityRoot struct {
|
|||||||
Duration func(childComplexity int) int
|
Duration func(childComplexity int) int
|
||||||
Energy func(childComplexity int) int
|
Energy func(childComplexity int) int
|
||||||
EnergyFootprint func(childComplexity int) int
|
EnergyFootprint func(childComplexity int) int
|
||||||
Exclusive func(childComplexity int) int
|
|
||||||
Footprint func(childComplexity int) int
|
Footprint func(childComplexity int) int
|
||||||
ID func(childComplexity int) int
|
ID func(childComplexity int) int
|
||||||
JobID func(childComplexity int) int
|
JobID func(childComplexity int) int
|
||||||
@@ -131,6 +130,7 @@ type ComplexityRoot struct {
|
|||||||
Project func(childComplexity int) int
|
Project func(childComplexity int) int
|
||||||
Resources func(childComplexity int) int
|
Resources func(childComplexity int) int
|
||||||
SMT func(childComplexity int) int
|
SMT func(childComplexity int) int
|
||||||
|
Shared func(childComplexity int) int
|
||||||
StartTime func(childComplexity int) int
|
StartTime func(childComplexity int) int
|
||||||
State func(childComplexity int) int
|
State func(childComplexity int) int
|
||||||
SubCluster func(childComplexity int) int
|
SubCluster func(childComplexity int) int
|
||||||
@@ -427,8 +427,6 @@ type ClusterResolver interface {
|
|||||||
type JobResolver interface {
|
type JobResolver interface {
|
||||||
StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error)
|
StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error)
|
||||||
|
|
||||||
Exclusive(ctx context.Context, obj *schema.Job) (int, error)
|
|
||||||
|
|
||||||
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
|
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
|
||||||
|
|
||||||
ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error)
|
ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error)
|
||||||
@@ -729,13 +727,6 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
|||||||
|
|
||||||
return e.complexity.Job.EnergyFootprint(childComplexity), true
|
return e.complexity.Job.EnergyFootprint(childComplexity), true
|
||||||
|
|
||||||
case "Job.exclusive":
|
|
||||||
if e.complexity.Job.Exclusive == nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
return e.complexity.Job.Exclusive(childComplexity), true
|
|
||||||
|
|
||||||
case "Job.footprint":
|
case "Job.footprint":
|
||||||
if e.complexity.Job.Footprint == nil {
|
if e.complexity.Job.Footprint == nil {
|
||||||
break
|
break
|
||||||
@@ -820,6 +811,13 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
|||||||
|
|
||||||
return e.complexity.Job.SMT(childComplexity), true
|
return e.complexity.Job.SMT(childComplexity), true
|
||||||
|
|
||||||
|
case "Job.shared":
|
||||||
|
if e.complexity.Job.Shared == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
return e.complexity.Job.Shared(childComplexity), true
|
||||||
|
|
||||||
case "Job.startTime":
|
case "Job.startTime":
|
||||||
if e.complexity.Job.StartTime == nil {
|
if e.complexity.Job.StartTime == nil {
|
||||||
break
|
break
|
||||||
@@ -2379,7 +2377,7 @@ type Job {
|
|||||||
numAcc: Int!
|
numAcc: Int!
|
||||||
energy: Float!
|
energy: Float!
|
||||||
SMT: Int!
|
SMT: Int!
|
||||||
exclusive: Int!
|
shared: String!
|
||||||
partition: String!
|
partition: String!
|
||||||
arrayJobId: Int!
|
arrayJobId: Int!
|
||||||
monitoringStatus: Int!
|
monitoringStatus: Int!
|
||||||
@@ -2766,7 +2764,7 @@ input JobFilter {
|
|||||||
startTime: TimeRange
|
startTime: TimeRange
|
||||||
state: [JobState!]
|
state: [JobState!]
|
||||||
metricStats: [MetricStatItem!]
|
metricStats: [MetricStatItem!]
|
||||||
exclusive: Int
|
shared: String
|
||||||
node: StringInput
|
node: StringInput
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5241,8 +5239,8 @@ func (ec *executionContext) fieldContext_Job_SMT(_ context.Context, field graphq
|
|||||||
return fc, nil
|
return fc, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
|
func (ec *executionContext) _Job_shared(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
|
||||||
fc, err := ec.fieldContext_Job_exclusive(ctx, field)
|
fc, err := ec.fieldContext_Job_shared(ctx, field)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return graphql.Null
|
return graphql.Null
|
||||||
}
|
}
|
||||||
@@ -5255,7 +5253,7 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
|
|||||||
}()
|
}()
|
||||||
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
|
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
|
||||||
ctx = rctx // use context from middleware stack in children
|
ctx = rctx // use context from middleware stack in children
|
||||||
return ec.resolvers.Job().Exclusive(rctx, obj)
|
return obj.Shared, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ec.Error(ctx, err)
|
ec.Error(ctx, err)
|
||||||
@@ -5267,19 +5265,19 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
|
|||||||
}
|
}
|
||||||
return graphql.Null
|
return graphql.Null
|
||||||
}
|
}
|
||||||
res := resTmp.(int)
|
res := resTmp.(string)
|
||||||
fc.Result = res
|
fc.Result = res
|
||||||
return ec.marshalNInt2int(ctx, field.Selections, res)
|
return ec.marshalNString2string(ctx, field.Selections, res)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ec *executionContext) fieldContext_Job_exclusive(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
|
func (ec *executionContext) fieldContext_Job_shared(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
|
||||||
fc = &graphql.FieldContext{
|
fc = &graphql.FieldContext{
|
||||||
Object: "Job",
|
Object: "Job",
|
||||||
Field: field,
|
Field: field,
|
||||||
IsMethod: true,
|
IsMethod: false,
|
||||||
IsResolver: true,
|
IsResolver: false,
|
||||||
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
|
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
|
||||||
return nil, errors.New("field of type Int does not have child fields")
|
return nil, errors.New("field of type String does not have child fields")
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
return fc, nil
|
return fc, nil
|
||||||
@@ -6428,8 +6426,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(_ context.Context,
|
|||||||
return ec.fieldContext_Job_energy(ctx, field)
|
return ec.fieldContext_Job_energy(ctx, field)
|
||||||
case "SMT":
|
case "SMT":
|
||||||
return ec.fieldContext_Job_SMT(ctx, field)
|
return ec.fieldContext_Job_SMT(ctx, field)
|
||||||
case "exclusive":
|
case "shared":
|
||||||
return ec.fieldContext_Job_exclusive(ctx, field)
|
return ec.fieldContext_Job_shared(ctx, field)
|
||||||
case "partition":
|
case "partition":
|
||||||
return ec.fieldContext_Job_partition(ctx, field)
|
return ec.fieldContext_Job_partition(ctx, field)
|
||||||
case "arrayJobId":
|
case "arrayJobId":
|
||||||
@@ -11158,8 +11156,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
|
|||||||
return ec.fieldContext_Job_energy(ctx, field)
|
return ec.fieldContext_Job_energy(ctx, field)
|
||||||
case "SMT":
|
case "SMT":
|
||||||
return ec.fieldContext_Job_SMT(ctx, field)
|
return ec.fieldContext_Job_SMT(ctx, field)
|
||||||
case "exclusive":
|
case "shared":
|
||||||
return ec.fieldContext_Job_exclusive(ctx, field)
|
return ec.fieldContext_Job_shared(ctx, field)
|
||||||
case "partition":
|
case "partition":
|
||||||
return ec.fieldContext_Job_partition(ctx, field)
|
return ec.fieldContext_Job_partition(ctx, field)
|
||||||
case "arrayJobId":
|
case "arrayJobId":
|
||||||
@@ -16475,7 +16473,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
|
|||||||
asMap[k] = v
|
asMap[k] = v
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "exclusive", "node"}
|
fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "shared", "node"}
|
||||||
for _, k := range fieldsInOrder {
|
for _, k := range fieldsInOrder {
|
||||||
v, ok := asMap[k]
|
v, ok := asMap[k]
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -16608,13 +16606,13 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
|
|||||||
return it, err
|
return it, err
|
||||||
}
|
}
|
||||||
it.MetricStats = data
|
it.MetricStats = data
|
||||||
case "exclusive":
|
case "shared":
|
||||||
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("exclusive"))
|
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("shared"))
|
||||||
data, err := ec.unmarshalOInt2ᚖint(ctx, v)
|
data, err := ec.unmarshalOString2ᚖstring(ctx, v)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return it, err
|
return it, err
|
||||||
}
|
}
|
||||||
it.Exclusive = data
|
it.Shared = data
|
||||||
case "node":
|
case "node":
|
||||||
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node"))
|
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node"))
|
||||||
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
|
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
|
||||||
@@ -17522,42 +17520,11 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
|
|||||||
if out.Values[i] == graphql.Null {
|
if out.Values[i] == graphql.Null {
|
||||||
atomic.AddUint32(&out.Invalids, 1)
|
atomic.AddUint32(&out.Invalids, 1)
|
||||||
}
|
}
|
||||||
case "exclusive":
|
case "shared":
|
||||||
field := field
|
out.Values[i] = ec._Job_shared(ctx, field, obj)
|
||||||
|
if out.Values[i] == graphql.Null {
|
||||||
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
|
atomic.AddUint32(&out.Invalids, 1)
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
ec.Error(ctx, ec.Recover(ctx, r))
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
res = ec._Job_exclusive(ctx, field, obj)
|
|
||||||
if res == graphql.Null {
|
|
||||||
atomic.AddUint32(&fs.Invalids, 1)
|
|
||||||
}
|
|
||||||
return res
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if field.Deferrable != nil {
|
|
||||||
dfs, ok := deferred[field.Deferrable.Label]
|
|
||||||
di := 0
|
|
||||||
if ok {
|
|
||||||
dfs.AddField(field)
|
|
||||||
di = len(dfs.Values) - 1
|
|
||||||
} else {
|
|
||||||
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
|
|
||||||
deferred[field.Deferrable.Label] = dfs
|
|
||||||
}
|
|
||||||
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
|
|
||||||
return innerFunc(ctx, dfs)
|
|
||||||
})
|
|
||||||
|
|
||||||
// don't run the out.Concurrently() call below
|
|
||||||
out.Values[i] = graphql.Null
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
|
|
||||||
case "partition":
|
case "partition":
|
||||||
out.Values[i] = ec._Job_partition(ctx, field, obj)
|
out.Values[i] = ec._Job_partition(ctx, field, obj)
|
||||||
if out.Values[i] == graphql.Null {
|
if out.Values[i] == graphql.Null {
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ type JobFilter struct {
|
|||||||
StartTime *config.TimeRange `json:"startTime,omitempty"`
|
StartTime *config.TimeRange `json:"startTime,omitempty"`
|
||||||
State []schema.JobState `json:"state,omitempty"`
|
State []schema.JobState `json:"state,omitempty"`
|
||||||
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||||
Exclusive *int `json:"exclusive,omitempty"`
|
Shared *string `json:"shared,omitempty"`
|
||||||
Node *StringInput `json:"node,omitempty"`
|
Node *StringInput `json:"node,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,11 +35,6 @@ func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Tim
|
|||||||
return ×tamp, nil
|
return ×tamp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exclusive is the resolver for the exclusive field.
|
|
||||||
func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
|
|
||||||
panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tags is the resolver for the tags field.
|
// Tags is the resolver for the tags field.
|
||||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
||||||
@@ -859,3 +854,15 @@ type mutationResolver struct{ *Resolver }
|
|||||||
type nodeResolver struct{ *Resolver }
|
type nodeResolver struct{ *Resolver }
|
||||||
type queryResolver struct{ *Resolver }
|
type queryResolver struct{ *Resolver }
|
||||||
type subClusterResolver struct{ *Resolver }
|
type subClusterResolver struct{ *Resolver }
|
||||||
|
|
||||||
|
// !!! WARNING !!!
|
||||||
|
// The code below was going to be deleted when updating resolvers. It has been copied here so you have
|
||||||
|
// one last chance to move it out of harms way if you want. There are two reasons this happens:
|
||||||
|
// - When renaming or deleting a resolver the old code will be put in here. You can safely delete
|
||||||
|
// it when you're done.
|
||||||
|
// - You have helper methods in this file. Move them out to keep these resolver files clean.
|
||||||
|
/*
|
||||||
|
func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
|
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"shared":"none","monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"exclusive":1,"jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
|
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"shared":"none","jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
|
||||||
|
|||||||
419
internal/memorystore/api.go
Normal file
419
internal/memorystore/api.go
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"math"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/util"
|
||||||
|
|
||||||
|
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
|
)
|
||||||
|
|
||||||
|
// @title cc-metric-store REST API
|
||||||
|
// @version 1.0.0
|
||||||
|
// @description API for cc-metric-store
|
||||||
|
|
||||||
|
// @contact.name ClusterCockpit Project
|
||||||
|
// @contact.url https://clustercockpit.org
|
||||||
|
// @contact.email support@clustercockpit.org
|
||||||
|
|
||||||
|
// @license.name MIT License
|
||||||
|
// @license.url https://opensource.org/licenses/MIT
|
||||||
|
|
||||||
|
// @host localhost:8082
|
||||||
|
// @basePath /api/
|
||||||
|
|
||||||
|
// @securityDefinitions.apikey ApiKeyAuth
|
||||||
|
// @in header
|
||||||
|
// @name X-Auth-Token
|
||||||
|
|
||||||
|
// ErrorResponse model
|
||||||
|
type ErrorResponse struct {
|
||||||
|
// Statustext of Errorcode
|
||||||
|
Status string `json:"status"`
|
||||||
|
Error string `json:"error"` // Error Message
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiMetricData struct {
|
||||||
|
Error *string `json:"error,omitempty"`
|
||||||
|
Data schema.FloatArray `json:"data,omitempty"`
|
||||||
|
From int64 `json:"from"`
|
||||||
|
To int64 `json:"to"`
|
||||||
|
Resolution int64 `json:"resolution"`
|
||||||
|
Avg schema.Float `json:"avg"`
|
||||||
|
Min schema.Float `json:"min"`
|
||||||
|
Max schema.Float `json:"max"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleError(err error, statusCode int, rw http.ResponseWriter) {
|
||||||
|
// log.Warnf("REST ERROR : %s", err.Error())
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(statusCode)
|
||||||
|
json.NewEncoder(rw).Encode(ErrorResponse{
|
||||||
|
Status: http.StatusText(statusCode),
|
||||||
|
Error: err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Optimize this, just like the stats endpoint!
|
||||||
|
func (data *ApiMetricData) AddStats() {
|
||||||
|
n := 0
|
||||||
|
sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64
|
||||||
|
for _, x := range data.Data {
|
||||||
|
if x.IsNaN() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
n += 1
|
||||||
|
sum += float64(x)
|
||||||
|
min = math.Min(min, float64(x))
|
||||||
|
max = math.Max(max, float64(x))
|
||||||
|
}
|
||||||
|
|
||||||
|
if n > 0 {
|
||||||
|
avg := sum / float64(n)
|
||||||
|
data.Avg = schema.Float(avg)
|
||||||
|
data.Min = schema.Float(min)
|
||||||
|
data.Max = schema.Float(max)
|
||||||
|
} else {
|
||||||
|
data.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (data *ApiMetricData) ScaleBy(f schema.Float) {
|
||||||
|
if f == 0 || f == 1 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
data.Avg *= f
|
||||||
|
data.Min *= f
|
||||||
|
data.Max *= f
|
||||||
|
for i := 0; i < len(data.Data); i++ {
|
||||||
|
data.Data[i] *= f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (data *ApiMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) {
|
||||||
|
minfo, ok := ms.Metrics[metric]
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.From / minfo.Frequency) > (from / minfo.Frequency) {
|
||||||
|
padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))
|
||||||
|
ndata := make([]schema.Float, 0, padfront+len(data.Data))
|
||||||
|
for i := 0; i < padfront; i++ {
|
||||||
|
ndata = append(ndata, schema.NaN)
|
||||||
|
}
|
||||||
|
for j := 0; j < len(data.Data); j++ {
|
||||||
|
ndata = append(ndata, data.Data[j])
|
||||||
|
}
|
||||||
|
data.Data = ndata
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleFree godoc
|
||||||
|
// @summary
|
||||||
|
// @tags free
|
||||||
|
// @description This endpoint allows the users to free the Buffers from the
|
||||||
|
// metric store. This endpoint offers the users to remove then systematically
|
||||||
|
// and also allows then to prune the data under node, if they do not want to
|
||||||
|
// remove the whole node.
|
||||||
|
// @produce json
|
||||||
|
// @param to query string false "up to timestamp"
|
||||||
|
// @success 200 {string} string "ok"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /free/ [post]
|
||||||
|
func HandleFree(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rawTo := r.URL.Query().Get("to")
|
||||||
|
if rawTo == "" {
|
||||||
|
handleError(errors.New("'to' is a required query parameter"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
to, err := strconv.ParseInt(rawTo, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// // TODO: lastCheckpoint might be modified by different go-routines.
|
||||||
|
// // Load it using the sync/atomic package?
|
||||||
|
// freeUpTo := lastCheckpoint.Unix()
|
||||||
|
// if to < freeUpTo {
|
||||||
|
// freeUpTo = to
|
||||||
|
// }
|
||||||
|
|
||||||
|
bodyDec := json.NewDecoder(r.Body)
|
||||||
|
var selectors [][]string
|
||||||
|
err = bodyDec.Decode(&selectors)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
n := 0
|
||||||
|
for _, sel := range selectors {
|
||||||
|
bn, err := ms.Free(sel, to)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
n += bn
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
fmt.Fprintf(rw, "buffers freed: %d\n", n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleWrite godoc
|
||||||
|
// @summary Receive metrics in InfluxDB line-protocol
|
||||||
|
// @tags write
|
||||||
|
// @description Write data to the in-memory store in the InfluxDB line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)
|
||||||
|
|
||||||
|
// @accept plain
|
||||||
|
// @produce json
|
||||||
|
// @param cluster query string false "If the lines in the body do not have a cluster tag, use this value instead."
|
||||||
|
// @success 200 {string} string "ok"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /write/ [post]
|
||||||
|
func HandleWrite(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
bytes, err := io.ReadAll(r.Body)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
dec := lineprotocol.NewDecoderWithBytes(bytes)
|
||||||
|
if err := decodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
|
||||||
|
log.Printf("/api/write error: %s", err.Error())
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiQueryRequest struct {
|
||||||
|
Cluster string `json:"cluster"`
|
||||||
|
Queries []ApiQuery `json:"queries"`
|
||||||
|
ForAllNodes []string `json:"for-all-nodes"`
|
||||||
|
From int64 `json:"from"`
|
||||||
|
To int64 `json:"to"`
|
||||||
|
WithStats bool `json:"with-stats"`
|
||||||
|
WithData bool `json:"with-data"`
|
||||||
|
WithPadding bool `json:"with-padding"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiQueryResponse struct {
|
||||||
|
Queries []ApiQuery `json:"queries,omitempty"`
|
||||||
|
Results [][]ApiMetricData `json:"results"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiQuery struct {
|
||||||
|
Type *string `json:"type,omitempty"`
|
||||||
|
SubType *string `json:"subtype,omitempty"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Hostname string `json:"host"`
|
||||||
|
Resolution int64 `json:"resolution"`
|
||||||
|
TypeIds []string `json:"type-ids,omitempty"`
|
||||||
|
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||||
|
ScaleFactor schema.Float `json:"scale-by,omitempty"`
|
||||||
|
Aggregate bool `json:"aggreg"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func FetchData(req ApiQueryRequest) (*ApiQueryResponse, error) {
|
||||||
|
|
||||||
|
req.WithData = true
|
||||||
|
req.WithData = true
|
||||||
|
req.WithData = true
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
|
response := ApiQueryResponse{
|
||||||
|
Results: make([][]ApiMetricData, 0, len(req.Queries)),
|
||||||
|
}
|
||||||
|
if req.ForAllNodes != nil {
|
||||||
|
nodes := ms.ListChildren([]string{req.Cluster})
|
||||||
|
for _, node := range nodes {
|
||||||
|
for _, metric := range req.ForAllNodes {
|
||||||
|
q := ApiQuery{
|
||||||
|
Metric: metric,
|
||||||
|
Hostname: node,
|
||||||
|
}
|
||||||
|
req.Queries = append(req.Queries, q)
|
||||||
|
response.Queries = append(response.Queries, q)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, query := range req.Queries {
|
||||||
|
sels := make([]util.Selector, 0, 1)
|
||||||
|
if query.Aggregate || query.Type == nil {
|
||||||
|
sel := util.Selector{{String: req.Cluster}, {String: query.Hostname}}
|
||||||
|
if query.Type != nil {
|
||||||
|
if len(query.TypeIds) == 1 {
|
||||||
|
sel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]})
|
||||||
|
} else {
|
||||||
|
ids := make([]string, len(query.TypeIds))
|
||||||
|
for i, id := range query.TypeIds {
|
||||||
|
ids[i] = *query.Type + id
|
||||||
|
}
|
||||||
|
sel = append(sel, util.SelectorElement{Group: ids})
|
||||||
|
}
|
||||||
|
|
||||||
|
if query.SubType != nil {
|
||||||
|
if len(query.SubTypeIds) == 1 {
|
||||||
|
sel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]})
|
||||||
|
} else {
|
||||||
|
ids := make([]string, len(query.SubTypeIds))
|
||||||
|
for i, id := range query.SubTypeIds {
|
||||||
|
ids[i] = *query.SubType + id
|
||||||
|
}
|
||||||
|
sel = append(sel, util.SelectorElement{Group: ids})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sels = append(sels, sel)
|
||||||
|
} else {
|
||||||
|
for _, typeId := range query.TypeIds {
|
||||||
|
if query.SubType != nil {
|
||||||
|
for _, subTypeId := range query.SubTypeIds {
|
||||||
|
sels = append(sels, util.Selector{
|
||||||
|
{String: req.Cluster},
|
||||||
|
{String: query.Hostname},
|
||||||
|
{String: *query.Type + typeId},
|
||||||
|
{String: *query.SubType + subTypeId},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sels = append(sels, util.Selector{
|
||||||
|
{String: req.Cluster},
|
||||||
|
{String: query.Hostname},
|
||||||
|
{String: *query.Type + typeId},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Printf("query: %#v\n", query)
|
||||||
|
// log.Printf("sels: %#v\n", sels)
|
||||||
|
var err error
|
||||||
|
res := make([]ApiMetricData, 0, len(sels))
|
||||||
|
for _, sel := range sels {
|
||||||
|
data := ApiMetricData{}
|
||||||
|
|
||||||
|
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
msg := err.Error()
|
||||||
|
data.Error = &msg
|
||||||
|
res = append(res, data)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.WithStats {
|
||||||
|
data.AddStats()
|
||||||
|
}
|
||||||
|
if query.ScaleFactor != 0 {
|
||||||
|
data.ScaleBy(query.ScaleFactor)
|
||||||
|
}
|
||||||
|
if req.WithPadding {
|
||||||
|
data.PadDataWithNull(ms, req.From, req.To, query.Metric)
|
||||||
|
}
|
||||||
|
if !req.WithData {
|
||||||
|
data.Data = nil
|
||||||
|
}
|
||||||
|
res = append(res, data)
|
||||||
|
}
|
||||||
|
response.Results = append(response.Results, res)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &response, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleDebug godoc
|
||||||
|
// @summary Debug endpoint
|
||||||
|
// @tags debug
|
||||||
|
// @description This endpoint allows the users to print the content of
|
||||||
|
// nodes/clusters/metrics to review the state of the data.
|
||||||
|
// @produce json
|
||||||
|
// @param selector query string false "Selector"
|
||||||
|
// @success 200 {string} string "Debug dump"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /debug/ [post]
|
||||||
|
func HandleDebug(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
raw := r.URL.Query().Get("selector")
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
selector := []string{}
|
||||||
|
if len(raw) != 0 {
|
||||||
|
selector = strings.Split(raw, ":")
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleHealthCheck godoc
|
||||||
|
// @summary HealthCheck endpoint
|
||||||
|
// @tags healthcheck
|
||||||
|
// @description This endpoint allows the users to check if a node is healthy
|
||||||
|
// @produce json
|
||||||
|
// @param selector query string false "Selector"
|
||||||
|
// @success 200 {string} string "Debug dump"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /healthcheck/ [get]
|
||||||
|
func HandleHealthCheck(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rawCluster := r.URL.Query().Get("cluster")
|
||||||
|
rawNode := r.URL.Query().Get("node")
|
||||||
|
|
||||||
|
if rawCluster == "" || rawNode == "" {
|
||||||
|
handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
|
||||||
|
selector := []string{rawCluster, rawNode}
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
192
internal/memorystore/archive.go
Normal file
192
internal/memorystore/archive.go
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Archiving(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
d, err := time.ParseDuration(config.MetricStoreKeys.Archive.Interval)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
|
||||||
|
}
|
||||||
|
if d <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ticks := func() <-chan time.Time {
|
||||||
|
if d <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return time.NewTicker(d).C
|
||||||
|
}()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticks:
|
||||||
|
t := time.Now().Add(-d)
|
||||||
|
log.Printf("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339))
|
||||||
|
n, err := ArchiveCheckpoints(config.MetricStoreKeys.Checkpoints.RootDir,
|
||||||
|
config.MetricStoreKeys.Archive.RootDir, t.Unix(), config.MetricStoreKeys.Archive.DeleteInstead)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("[METRICSTORE]> archiving failed: %s\n", err.Error())
|
||||||
|
} else {
|
||||||
|
log.Printf("[METRICSTORE]> done: %d files zipped and moved to archive\n", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
var ErrNoNewData error = errors.New("all data already archived")
|
||||||
|
|
||||||
|
// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`,
|
||||||
|
// deleting them from the `checkpointsDir`.
|
||||||
|
func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||||
|
entries1, err := os.ReadDir(checkpointsDir)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
type workItem struct {
|
||||||
|
cdir, adir string
|
||||||
|
cluster, host string
|
||||||
|
}
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
work := make(chan workItem, NumWorkers)
|
||||||
|
|
||||||
|
wg.Add(NumWorkers)
|
||||||
|
for worker := 0; worker < NumWorkers; worker++ {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for workItem := range work {
|
||||||
|
m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
}
|
||||||
|
atomic.AddInt32(&n, int32(m))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, de1 := range entries1 {
|
||||||
|
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
|
||||||
|
if e != nil {
|
||||||
|
err = e
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, de2 := range entries2 {
|
||||||
|
cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
|
||||||
|
adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
|
||||||
|
work <- workItem{
|
||||||
|
adir: adir, cdir: cdir,
|
||||||
|
cluster: de1.Name(), host: de2.Name(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return int(n), err
|
||||||
|
}
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function for `ArchiveCheckpoints`.
|
||||||
|
func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||||
|
entries, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
extension := config.MetricStoreKeys.Checkpoints.FileFormat
|
||||||
|
files, err := findFiles(entries, from, extension, false)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if deleteInstead {
|
||||||
|
n := 0
|
||||||
|
for _, checkpoint := range files {
|
||||||
|
filename := filepath.Join(dir, checkpoint)
|
||||||
|
if err = os.Remove(filename); err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += 1
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
|
||||||
|
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||||
|
if err != nil && os.IsNotExist(err) {
|
||||||
|
err = os.MkdirAll(archiveDir, 0o755)
|
||||||
|
if err == nil {
|
||||||
|
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
bw := bufio.NewWriter(f)
|
||||||
|
defer bw.Flush()
|
||||||
|
zw := zip.NewWriter(bw)
|
||||||
|
defer zw.Close()
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
for _, checkpoint := range files {
|
||||||
|
filename := filepath.Join(dir, checkpoint)
|
||||||
|
r, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
w, err := zw.Create(checkpoint)
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = io.Copy(w, r); err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = os.Remove(filename); err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
233
internal/memorystore/buffer.go
Normal file
233
internal/memorystore/buffer.go
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Default buffer capacity.
|
||||||
|
// `buffer.data` will only ever grow up to it's capacity and a new link
|
||||||
|
// in the buffer chain will be created if needed so that no copying
|
||||||
|
// of data or reallocation needs to happen on writes.
|
||||||
|
const (
|
||||||
|
BUFFER_CAP int = 512
|
||||||
|
)
|
||||||
|
|
||||||
|
// So that we can reuse allocations
|
||||||
|
var bufferPool sync.Pool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
return &buffer{
|
||||||
|
data: make([]schema.Float, 0, BUFFER_CAP),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
|
||||||
|
ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Each metric on each level has it's own buffer.
|
||||||
|
// This is where the actual values go.
|
||||||
|
// If `cap(data)` is reached, a new buffer is created and
|
||||||
|
// becomes the new head of a buffer list.
|
||||||
|
type buffer struct {
|
||||||
|
prev *buffer
|
||||||
|
next *buffer
|
||||||
|
data []schema.Float
|
||||||
|
frequency int64
|
||||||
|
start int64
|
||||||
|
archived bool
|
||||||
|
closed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBuffer(ts, freq int64) *buffer {
|
||||||
|
b := bufferPool.Get().(*buffer)
|
||||||
|
b.frequency = freq
|
||||||
|
b.start = ts - (freq / 2)
|
||||||
|
b.prev = nil
|
||||||
|
b.next = nil
|
||||||
|
b.archived = false
|
||||||
|
b.closed = false
|
||||||
|
b.data = b.data[:0]
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a new buffer was created, the new head is returnd.
|
||||||
|
// Otherwise, the existing buffer is returnd.
|
||||||
|
// Normaly, only "newer" data should be written, but if the value would
|
||||||
|
// end up in the same buffer anyways it is allowed.
|
||||||
|
func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
|
||||||
|
if ts < b.start {
|
||||||
|
return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
|
||||||
|
}
|
||||||
|
|
||||||
|
// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
|
||||||
|
idx := int((ts - b.start) / b.frequency)
|
||||||
|
if idx >= cap(b.data) {
|
||||||
|
newbuf := newBuffer(ts, b.frequency)
|
||||||
|
newbuf.prev = b
|
||||||
|
b.next = newbuf
|
||||||
|
b.close()
|
||||||
|
b = newbuf
|
||||||
|
idx = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Overwriting value or writing value from past
|
||||||
|
if idx < len(b.data) {
|
||||||
|
b.data[idx] = value
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill up unwritten slots with NaN
|
||||||
|
for i := len(b.data); i < idx; i++ {
|
||||||
|
b.data = append(b.data, schema.NaN)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.data = append(b.data, value)
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) end() int64 {
|
||||||
|
return b.firstWrite() + int64(len(b.data))*b.frequency
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) firstWrite() int64 {
|
||||||
|
return b.start + (b.frequency / 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) close() {}
|
||||||
|
|
||||||
|
/*
|
||||||
|
func (b *buffer) close() {
|
||||||
|
if b.closed {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
b.closed = true
|
||||||
|
n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64
|
||||||
|
for _, x := range b.data {
|
||||||
|
if x.IsNaN() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
n += 1
|
||||||
|
f := float64(x)
|
||||||
|
sum += f
|
||||||
|
min = math.Min(min, f)
|
||||||
|
max = math.Max(max, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.statisticts.samples = n
|
||||||
|
if n > 0 {
|
||||||
|
b.statisticts.avg = Float(sum / float64(n))
|
||||||
|
b.statisticts.min = Float(min)
|
||||||
|
b.statisticts.max = Float(max)
|
||||||
|
} else {
|
||||||
|
b.statisticts.avg = NaN
|
||||||
|
b.statisticts.min = NaN
|
||||||
|
b.statisticts.max = NaN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// func interpolate(idx int, data []Float) Float {
|
||||||
|
// if idx == 0 || idx+1 == len(data) {
|
||||||
|
// return NaN
|
||||||
|
// }
|
||||||
|
// return (data[idx-1] + data[idx+1]) / 2.0
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
|
||||||
|
// Simple linear interpolation is done between the two neighboring cells if possible.
|
||||||
|
// If values at the start or end are missing, instead of NaN values, the second and thrid
|
||||||
|
// return values contain the actual `from`/`to`.
|
||||||
|
// This function goes back the buffer chain if `from` is older than the currents buffer start.
|
||||||
|
// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
|
||||||
|
// If `data` is not long enough to hold all values, this function will panic!
|
||||||
|
func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
|
||||||
|
if from < b.firstWrite() {
|
||||||
|
if b.prev != nil {
|
||||||
|
return b.prev.read(from, to, data)
|
||||||
|
}
|
||||||
|
from = b.firstWrite()
|
||||||
|
}
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
t := from
|
||||||
|
for ; t < to; t += b.frequency {
|
||||||
|
idx := int((t - b.start) / b.frequency)
|
||||||
|
if idx >= cap(b.data) {
|
||||||
|
if b.next == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
b = b.next
|
||||||
|
idx = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx >= len(b.data) {
|
||||||
|
if b.next == nil || to <= b.next.start {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
data[i] += schema.NaN
|
||||||
|
} else if t < b.start {
|
||||||
|
data[i] += schema.NaN
|
||||||
|
// } else if b.data[idx].IsNaN() {
|
||||||
|
// data[i] += interpolate(idx, b.data)
|
||||||
|
} else {
|
||||||
|
data[i] += b.data[idx]
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
return data[:i], from, t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if this buffer needs to be freed.
|
||||||
|
func (b *buffer) free(t int64) (delme bool, n int) {
|
||||||
|
if b.prev != nil {
|
||||||
|
delme, m := b.prev.free(t)
|
||||||
|
n += m
|
||||||
|
if delme {
|
||||||
|
b.prev.next = nil
|
||||||
|
if cap(b.prev.data) == BUFFER_CAP {
|
||||||
|
bufferPool.Put(b.prev)
|
||||||
|
}
|
||||||
|
b.prev = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end := b.end()
|
||||||
|
if end < t {
|
||||||
|
return true, n + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return false, n
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
|
||||||
|
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
|
||||||
|
if b == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := b.prev.iterFromTo(from, to, callback); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if from <= b.end() && b.start <= to {
|
||||||
|
return callback(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) count() int64 {
|
||||||
|
res := int64(len(b.data))
|
||||||
|
if b.prev != nil {
|
||||||
|
res += b.prev.count()
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
765
internal/memorystore/checkpoint.go
Normal file
765
internal/memorystore/checkpoint.go
Normal file
@@ -0,0 +1,765 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/avro"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/linkedin/goavro/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Whenever changed, update MarshalJSON as well!
|
||||||
|
type CheckpointMetrics struct {
|
||||||
|
Data []schema.Float `json:"data"`
|
||||||
|
Frequency int64 `json:"frequency"`
|
||||||
|
Start int64 `json:"start"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CheckpointFile struct {
|
||||||
|
Metrics map[string]*CheckpointMetrics `json:"metrics"`
|
||||||
|
Children map[string]*CheckpointFile `json:"children"`
|
||||||
|
From int64 `json:"from"`
|
||||||
|
To int64 `json:"to"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastCheckpoint time.Time
|
||||||
|
|
||||||
|
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
lastCheckpoint = time.Now()
|
||||||
|
|
||||||
|
if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if d <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ticks := func() <-chan time.Time {
|
||||||
|
if d <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return time.NewTicker(d).C
|
||||||
|
}()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticks:
|
||||||
|
log.Printf("[METRICSTORE]> start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339))
|
||||||
|
now := time.Now()
|
||||||
|
n, err := ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir,
|
||||||
|
lastCheckpoint.Unix(), now.Unix())
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("[METRICSTORE]> checkpointing failed: %s\n", err.Error())
|
||||||
|
} else {
|
||||||
|
log.Printf("[METRICSTORE]> done: %d checkpoint files created\n", n)
|
||||||
|
lastCheckpoint = now
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
d, _ := time.ParseDuration("1m")
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-time.After(time.Duration(avro.CheckpointBufferMinutes) * time.Minute):
|
||||||
|
// This is the first tick untill we collect the data for given minutes.
|
||||||
|
avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false)
|
||||||
|
// log.Printf("Checkpointing %d avro files", count)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ticks := func() <-chan time.Time {
|
||||||
|
if d <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return time.NewTicker(d).C
|
||||||
|
}()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticks:
|
||||||
|
// Regular ticks of 1 minute to write data.
|
||||||
|
avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false)
|
||||||
|
// log.Printf("Checkpointing %d avro files", count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// As `Float` implements a custom MarshalJSON() function,
|
||||||
|
// serializing an array of such types has more overhead
|
||||||
|
// than one would assume (because of extra allocations, interfaces and so on).
|
||||||
|
func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
|
||||||
|
buf := make([]byte, 0, 128+len(cm.Data)*8)
|
||||||
|
buf = append(buf, `{"frequency":`...)
|
||||||
|
buf = strconv.AppendInt(buf, cm.Frequency, 10)
|
||||||
|
buf = append(buf, `,"start":`...)
|
||||||
|
buf = strconv.AppendInt(buf, cm.Start, 10)
|
||||||
|
buf = append(buf, `,"data":[`...)
|
||||||
|
for i, x := range cm.Data {
|
||||||
|
if i != 0 {
|
||||||
|
buf = append(buf, ',')
|
||||||
|
}
|
||||||
|
if x.IsNaN() {
|
||||||
|
buf = append(buf, `null`...)
|
||||||
|
} else {
|
||||||
|
buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf = append(buf, `]}`...)
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
|
||||||
|
// On a per-host basis a new JSON file is created. I have no idea if this will scale.
|
||||||
|
// The good thing: Only a host at a time is locked, so this function can run
|
||||||
|
// in parallel to writes/reads.
|
||||||
|
func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
||||||
|
levels := make([]*Level, 0)
|
||||||
|
selectors := make([][]string, 0)
|
||||||
|
m.root.lock.RLock()
|
||||||
|
for sel1, l1 := range m.root.children {
|
||||||
|
l1.lock.RLock()
|
||||||
|
for sel2, l2 := range l1.children {
|
||||||
|
levels = append(levels, l2)
|
||||||
|
selectors = append(selectors, []string{sel1, sel2})
|
||||||
|
}
|
||||||
|
l1.lock.RUnlock()
|
||||||
|
}
|
||||||
|
m.root.lock.RUnlock()
|
||||||
|
|
||||||
|
type workItem struct {
|
||||||
|
level *Level
|
||||||
|
dir string
|
||||||
|
selector []string
|
||||||
|
}
|
||||||
|
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(NumWorkers)
|
||||||
|
work := make(chan workItem, NumWorkers*2)
|
||||||
|
for worker := 0; worker < NumWorkers; worker++ {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
for workItem := range work {
|
||||||
|
if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {
|
||||||
|
if err == ErrNoNewData {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
} else {
|
||||||
|
atomic.AddInt32(&n, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(levels); i++ {
|
||||||
|
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||||
|
work <- workItem{
|
||||||
|
level: levels[i],
|
||||||
|
dir: dir,
|
||||||
|
selector: selectors[i],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
|
||||||
|
retval := &CheckpointFile{
|
||||||
|
From: from,
|
||||||
|
To: to,
|
||||||
|
Metrics: make(map[string]*CheckpointMetrics),
|
||||||
|
Children: make(map[string]*CheckpointFile),
|
||||||
|
}
|
||||||
|
|
||||||
|
for metric, minfo := range m.Metrics {
|
||||||
|
b := l.metrics[minfo.Offset]
|
||||||
|
if b == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
allArchived := true
|
||||||
|
b.iterFromTo(from, to, func(b *buffer) error {
|
||||||
|
if !b.archived {
|
||||||
|
allArchived = false
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if allArchived {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
data := make([]schema.Float, (to-from)/b.frequency+1)
|
||||||
|
data, start, end, err := b.read(from, to, data)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := int((end - start) / b.frequency); i < len(data); i++ {
|
||||||
|
data[i] = schema.NaN
|
||||||
|
}
|
||||||
|
|
||||||
|
retval.Metrics[metric] = &CheckpointMetrics{
|
||||||
|
Frequency: b.frequency,
|
||||||
|
Start: start,
|
||||||
|
Data: data,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, child := range l.children {
|
||||||
|
val, err := child.toCheckpointFile(from, to, m)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if val != nil {
|
||||||
|
retval.Children[name] = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return retval, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||||
|
cf, err := l.toCheckpointFile(from, to, m)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if cf == nil {
|
||||||
|
return ErrNoNewData
|
||||||
|
}
|
||||||
|
|
||||||
|
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||||
|
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||||
|
if err != nil && os.IsNotExist(err) {
|
||||||
|
err = os.MkdirAll(dir, 0o755)
|
||||||
|
if err == nil {
|
||||||
|
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
bw := bufio.NewWriter(f)
|
||||||
|
if err = json.NewEncoder(bw).Encode(cf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return bw.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
work := make(chan [2]string, NumWorkers)
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
|
||||||
|
wg.Add(NumWorkers)
|
||||||
|
for worker := 0; worker < NumWorkers; worker++ {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for host := range work {
|
||||||
|
lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))
|
||||||
|
nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("[METRICSTORE]> error while loading checkpoints: %s", err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
}
|
||||||
|
atomic.AddInt32(&n, int32(nn))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
clustersDir, err := os.ReadDir(dir)
|
||||||
|
for _, clusterDir := range clustersDir {
|
||||||
|
if !clusterDir.IsDir() {
|
||||||
|
err = errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
|
||||||
|
goto done
|
||||||
|
}
|
||||||
|
|
||||||
|
hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
|
||||||
|
if e != nil {
|
||||||
|
err = e
|
||||||
|
goto done
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, hostDir := range hostsDir {
|
||||||
|
if !hostDir.IsDir() {
|
||||||
|
err = errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
|
||||||
|
goto done
|
||||||
|
}
|
||||||
|
|
||||||
|
i++
|
||||||
|
if i%NumWorkers == 0 && i > 100 {
|
||||||
|
// Forcing garbage collection runs here regulary during the loading of checkpoints
|
||||||
|
// will decrease the total heap size after loading everything back to memory is done.
|
||||||
|
// While loading data, the heap will grow fast, so the GC target size will double
|
||||||
|
// almost always. By forcing GCs here, we can keep it growing more slowly so that
|
||||||
|
// at the end, less memory is wasted.
|
||||||
|
runtime.GC()
|
||||||
|
}
|
||||||
|
|
||||||
|
work <- [2]string{clusterDir.Name(), hostDir.Name()}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return int(n), err
|
||||||
|
}
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
|
||||||
|
// This function can only be called once and before the very first write or read.
|
||||||
|
// Different host's data is loaded to memory in parallel.
|
||||||
|
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||||
|
if _, err := os.Stat(dir); os.IsNotExist(err) {
|
||||||
|
// The directory does not exist, so create it using os.MkdirAll()
|
||||||
|
err := os.MkdirAll(dir, 0755) // 0755 sets the permissions for the directory
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
|
||||||
|
}
|
||||||
|
log.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Config read (replace with your actual config read)
|
||||||
|
fileFormat := config.MetricStoreKeys.Checkpoints.FileFormat
|
||||||
|
if fileFormat == "" {
|
||||||
|
fileFormat = "avro"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map to easily get the fallback format
|
||||||
|
oppositeFormat := map[string]string{
|
||||||
|
"json": "avro",
|
||||||
|
"avro": "json",
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, attempt to load the specified format
|
||||||
|
if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
|
||||||
|
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||||
|
} else if found {
|
||||||
|
log.Printf("[METRICSTORE]> Loading %s files because fileformat is %s\n", fileFormat, fileFormat)
|
||||||
|
return m.FromCheckpoint(dir, from, fileFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If not found, attempt the opposite format
|
||||||
|
altFormat := oppositeFormat[fileFormat]
|
||||||
|
if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
|
||||||
|
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||||
|
} else if found {
|
||||||
|
log.Printf("[METRICSTORE]> Loading %s files but fileformat is %s\n", altFormat, fileFormat)
|
||||||
|
return m.FromCheckpoint(dir, from, altFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("[METRICSTORE]> No valid checkpoint files found in the directory.")
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkFilesWithExtension(dir string, extension string) (bool, error) {
|
||||||
|
found := false
|
||||||
|
|
||||||
|
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error accessing path %s: %v", path, err)
|
||||||
|
}
|
||||||
|
if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension {
|
||||||
|
found = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("[METRICSTORE]> error walking through directories: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return found, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
|
||||||
|
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
from_timestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
|
||||||
|
|
||||||
|
// Same logic according to lineprotocol
|
||||||
|
from_timestamp -= (resolution / 2)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
|
||||||
|
|
||||||
|
var recordCounter int64 = 0
|
||||||
|
|
||||||
|
// Create a new OCF reader from the buffered reader
|
||||||
|
ocfReader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
metricsData := make(map[string]schema.FloatArray)
|
||||||
|
|
||||||
|
for ocfReader.Scan() {
|
||||||
|
datum, err := ocfReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
record, ok := datum.(map[string]interface{})
|
||||||
|
if !ok {
|
||||||
|
panic("[METRICSTORE]> failed to assert datum as map[string]interface{}")
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, value := range record {
|
||||||
|
metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
|
||||||
|
}
|
||||||
|
|
||||||
|
recordCounter += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
to := (from_timestamp + (recordCounter / (60 / resolution) * 60))
|
||||||
|
if to < from {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, floatArray := range metricsData {
|
||||||
|
metricName := avro.ReplaceKey(key)
|
||||||
|
|
||||||
|
if strings.Contains(metricName, avro.Delimiter) {
|
||||||
|
subString := strings.Split(metricName, avro.Delimiter)
|
||||||
|
|
||||||
|
lvl := l
|
||||||
|
|
||||||
|
for i := 0; i < len(subString)-1; i++ {
|
||||||
|
|
||||||
|
sel := subString[i]
|
||||||
|
|
||||||
|
if lvl.children == nil {
|
||||||
|
lvl.children = make(map[string]*Level)
|
||||||
|
}
|
||||||
|
|
||||||
|
child, ok := lvl.children[sel]
|
||||||
|
if !ok {
|
||||||
|
child = &Level{
|
||||||
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
lvl.children[sel] = child
|
||||||
|
}
|
||||||
|
lvl = child
|
||||||
|
}
|
||||||
|
|
||||||
|
leafMetricName := subString[len(subString)-1]
|
||||||
|
err = lvl.createBuffer(m, leafMetricName, floatArray, from_timestamp, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
err = l.createBuffer(m, metricName, floatArray, from_timestamp, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
|
||||||
|
n := len(floatArray)
|
||||||
|
b := &buffer{
|
||||||
|
frequency: resolution,
|
||||||
|
start: from,
|
||||||
|
data: floatArray[0:n:n],
|
||||||
|
prev: nil,
|
||||||
|
next: nil,
|
||||||
|
archived: true,
|
||||||
|
}
|
||||||
|
b.close()
|
||||||
|
|
||||||
|
minfo, ok := m.Metrics[metricName]
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
// return errors.New("Unkown metric: " + name)
|
||||||
|
}
|
||||||
|
|
||||||
|
prev := l.metrics[minfo.Offset]
|
||||||
|
if prev == nil {
|
||||||
|
l.metrics[minfo.Offset] = b
|
||||||
|
} else {
|
||||||
|
if prev.start > b.start {
|
||||||
|
return errors.New("wooops")
|
||||||
|
}
|
||||||
|
|
||||||
|
b.prev = prev
|
||||||
|
prev.next = b
|
||||||
|
|
||||||
|
missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
|
||||||
|
if missingCount > 0 {
|
||||||
|
missingCount /= int(b.frequency)
|
||||||
|
|
||||||
|
for range missingCount {
|
||||||
|
prev.data = append(prev.data, schema.NaN)
|
||||||
|
}
|
||||||
|
|
||||||
|
prev.data = prev.data[0:len(prev.data):len(prev.data)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l.metrics[minfo.Offset] = b
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) loadJsonFile(m *MemoryStore, f *os.File, from int64) error {
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
cf := &CheckpointFile{}
|
||||||
|
if err := json.NewDecoder(br).Decode(cf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if cf.To != 0 && cf.To < from {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := l.loadFile(cf, m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
||||||
|
for name, metric := range cf.Metrics {
|
||||||
|
n := len(metric.Data)
|
||||||
|
b := &buffer{
|
||||||
|
frequency: metric.Frequency,
|
||||||
|
start: metric.Start,
|
||||||
|
data: metric.Data[0:n:n], // Space is wasted here :(
|
||||||
|
prev: nil,
|
||||||
|
next: nil,
|
||||||
|
archived: true,
|
||||||
|
}
|
||||||
|
b.close()
|
||||||
|
|
||||||
|
minfo, ok := m.Metrics[name]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
// return errors.New("Unkown metric: " + name)
|
||||||
|
}
|
||||||
|
|
||||||
|
prev := l.metrics[minfo.Offset]
|
||||||
|
if prev == nil {
|
||||||
|
l.metrics[minfo.Offset] = b
|
||||||
|
} else {
|
||||||
|
if prev.start > b.start {
|
||||||
|
return errors.New("wooops")
|
||||||
|
}
|
||||||
|
|
||||||
|
b.prev = prev
|
||||||
|
prev.next = b
|
||||||
|
}
|
||||||
|
l.metrics[minfo.Offset] = b
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cf.Children) > 0 && l.children == nil {
|
||||||
|
l.children = make(map[string]*Level)
|
||||||
|
}
|
||||||
|
|
||||||
|
for sel, childCf := range cf.Children {
|
||||||
|
child, ok := l.children[sel]
|
||||||
|
if !ok {
|
||||||
|
child = &Level{
|
||||||
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
l.children[sel] = child
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := child.loadFile(childCf, m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) {
|
||||||
|
direntries, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
allFiles := make([]fs.DirEntry, 0)
|
||||||
|
filesLoaded := 0
|
||||||
|
for _, e := range direntries {
|
||||||
|
if e.IsDir() {
|
||||||
|
child := &Level{
|
||||||
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
|
children: make(map[string]*Level),
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension)
|
||||||
|
filesLoaded += files
|
||||||
|
if err != nil {
|
||||||
|
return filesLoaded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
l.children[e.Name()] = child
|
||||||
|
} else if strings.HasSuffix(e.Name(), "."+extension) {
|
||||||
|
allFiles = append(allFiles, e)
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := findFiles(allFiles, from, extension, true)
|
||||||
|
if err != nil {
|
||||||
|
return filesLoaded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
|
||||||
|
"json": l.loadJsonFile,
|
||||||
|
"avro": l.loadAvroFile,
|
||||||
|
}
|
||||||
|
|
||||||
|
loader := loaders[extension]
|
||||||
|
|
||||||
|
for _, filename := range files {
|
||||||
|
f, err := os.Open(path.Join(dir, filename))
|
||||||
|
if err != nil {
|
||||||
|
return filesLoaded, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
if err = loader(m, f, from); err != nil {
|
||||||
|
return filesLoaded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
filesLoaded += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return filesLoaded, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This will probably get very slow over time!
|
||||||
|
// A solution could be some sort of an index file in which all other files
|
||||||
|
// and the timespan they contain is listed.
|
||||||
|
func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) {
|
||||||
|
nums := map[string]int64{}
|
||||||
|
for _, e := range direntries {
|
||||||
|
if !strings.HasSuffix(e.Name(), "."+extension) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
nums[e.Name()] = ts
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(direntries, func(i, j int) bool {
|
||||||
|
a, b := direntries[i], direntries[j]
|
||||||
|
return nums[a.Name()] < nums[b.Name()]
|
||||||
|
})
|
||||||
|
|
||||||
|
filenames := make([]string, 0)
|
||||||
|
for i := 0; i < len(direntries); i++ {
|
||||||
|
e := direntries[i]
|
||||||
|
ts1 := nums[e.Name()]
|
||||||
|
|
||||||
|
if findMoreRecentFiles && t <= ts1 {
|
||||||
|
filenames = append(filenames, e.Name())
|
||||||
|
}
|
||||||
|
if i == len(direntries)-1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
enext := direntries[i+1]
|
||||||
|
ts2 := nums[enext.Name()]
|
||||||
|
|
||||||
|
if findMoreRecentFiles {
|
||||||
|
if ts1 < t && t < ts2 {
|
||||||
|
filenames = append(filenames, e.Name())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ts2 < t {
|
||||||
|
filenames = append(filenames, e.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return filenames, nil
|
||||||
|
}
|
||||||
107
internal/memorystore/debug.go
Normal file
107
internal/memorystore/debug.go
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (b *buffer) debugDump(buf []byte) []byte {
|
||||||
|
if b.prev != nil {
|
||||||
|
buf = b.prev.debugDump(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
start, len, end := b.start, len(b.data), b.start+b.frequency*int64(len(b.data))
|
||||||
|
buf = append(buf, `{"start":`...)
|
||||||
|
buf = strconv.AppendInt(buf, start, 10)
|
||||||
|
buf = append(buf, `,"len":`...)
|
||||||
|
buf = strconv.AppendInt(buf, int64(len), 10)
|
||||||
|
buf = append(buf, `,"end":`...)
|
||||||
|
buf = strconv.AppendInt(buf, end, 10)
|
||||||
|
if b.archived {
|
||||||
|
buf = append(buf, `,"saved":true`...)
|
||||||
|
}
|
||||||
|
if b.next != nil {
|
||||||
|
buf = append(buf, `},`...)
|
||||||
|
} else {
|
||||||
|
buf = append(buf, `}`...)
|
||||||
|
}
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
for i := 0; i < depth; i++ {
|
||||||
|
buf = append(buf, '\t')
|
||||||
|
}
|
||||||
|
buf = append(buf, '"')
|
||||||
|
buf = append(buf, lvlname...)
|
||||||
|
buf = append(buf, "\":{\n"...)
|
||||||
|
depth += 1
|
||||||
|
objitems := 0
|
||||||
|
for name, mc := range m.Metrics {
|
||||||
|
if b := l.metrics[mc.Offset]; b != nil {
|
||||||
|
for i := 0; i < depth; i++ {
|
||||||
|
buf = append(buf, '\t')
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = append(buf, '"')
|
||||||
|
buf = append(buf, name...)
|
||||||
|
buf = append(buf, `":[`...)
|
||||||
|
buf = b.debugDump(buf)
|
||||||
|
buf = append(buf, "],\n"...)
|
||||||
|
objitems++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, lvl := range l.children {
|
||||||
|
_, err := w.Write(buf)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = buf[0:0]
|
||||||
|
buf, err = lvl.debugDump(m, w, name, buf, depth)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = append(buf, ',', '\n')
|
||||||
|
objitems++
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove final `,`:
|
||||||
|
if objitems > 0 {
|
||||||
|
buf = append(buf[0:len(buf)-1], '\n')
|
||||||
|
}
|
||||||
|
|
||||||
|
depth -= 1
|
||||||
|
for i := 0; i < depth; i++ {
|
||||||
|
buf = append(buf, '\t')
|
||||||
|
}
|
||||||
|
buf = append(buf, '}')
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) DebugDump(w *bufio.Writer, selector []string) error {
|
||||||
|
lvl := m.root.findLevel(selector)
|
||||||
|
if lvl == nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := make([]byte, 0, 2048)
|
||||||
|
buf = append(buf, "{"...)
|
||||||
|
|
||||||
|
buf, err := lvl.debugDump(m, w, "data", buf, 0)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = append(buf, "}\n"...)
|
||||||
|
if _, err = w.Write(buf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return w.Flush()
|
||||||
|
}
|
||||||
88
internal/memorystore/healthcheck.go
Normal file
88
internal/memorystore/healthcheck.go
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This is a threshold that allows a node to be healthy with certain number of data points missing.
|
||||||
|
// Suppose a node does not receive last 5 data points, then healthCheck endpoint will still say a
|
||||||
|
// node is healthy. Anything more than 5 missing points in metrics of the node will deem the node unhealthy.
|
||||||
|
const MaxMissingDataPoints int64 = 5
|
||||||
|
|
||||||
|
// This is a threshold which allows upto certain number of metrics in a node to be unhealthly.
|
||||||
|
// Works with MaxMissingDataPoints. Say 5 metrics (including submetrics) do not receive the last
|
||||||
|
// MaxMissingDataPoints data points, then the node will be deemed healthy. Any more metrics that does
|
||||||
|
// not receive data for MaxMissingDataPoints data points will deem the node unhealthy.
|
||||||
|
const MaxUnhealthyMetrics int64 = 5
|
||||||
|
|
||||||
|
func (b *buffer) healthCheck() int64 {
|
||||||
|
|
||||||
|
// Check if the buffer is empty
|
||||||
|
if b.data == nil {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer_end := b.start + b.frequency*int64(len(b.data))
|
||||||
|
t := time.Now().Unix()
|
||||||
|
|
||||||
|
// Check if the buffer is too old
|
||||||
|
if t-buffer_end > MaxMissingDataPoints*b.frequency {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) healthCheck(m *MemoryStore, count int64) (int64, error) {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
|
||||||
|
for _, mc := range m.Metrics {
|
||||||
|
if b := l.metrics[mc.Offset]; b != nil {
|
||||||
|
count += b.healthCheck()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, lvl := range l.children {
|
||||||
|
c, err := lvl.healthCheck(m, 0)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
count += c
|
||||||
|
}
|
||||||
|
|
||||||
|
return count, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) HealthCheck(w *bufio.Writer, selector []string) error {
|
||||||
|
lvl := m.root.findLevel(selector)
|
||||||
|
if lvl == nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := make([]byte, 0, 25)
|
||||||
|
// buf = append(buf, "{"...)
|
||||||
|
|
||||||
|
var count int64 = 0
|
||||||
|
|
||||||
|
unhealthyMetricsCount, err := lvl.healthCheck(m, count)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if unhealthyMetricsCount < MaxUnhealthyMetrics {
|
||||||
|
buf = append(buf, "Healthy"...)
|
||||||
|
} else {
|
||||||
|
buf = append(buf, "Unhealthy"...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// buf = append(buf, "}\n"...)
|
||||||
|
|
||||||
|
if _, err = w.Write(buf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return w.Flush()
|
||||||
|
}
|
||||||
187
internal/memorystore/level.go
Normal file
187
internal/memorystore/level.go
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Could also be called "node" as this forms a node in a tree structure.
|
||||||
|
// Called Level because "node" might be confusing here.
|
||||||
|
// Can be both a leaf or a inner node. In this tree structue, inner nodes can
|
||||||
|
// also hold data (in `metrics`).
|
||||||
|
type Level struct {
|
||||||
|
children map[string]*Level
|
||||||
|
metrics []*buffer
|
||||||
|
lock sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the correct level for the given selector, creating it if
|
||||||
|
// it does not exist. Example selector in the context of the
|
||||||
|
// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
|
||||||
|
// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
|
||||||
|
func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
|
||||||
|
if len(selector) == 0 {
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow concurrent reads:
|
||||||
|
l.lock.RLock()
|
||||||
|
var child *Level
|
||||||
|
var ok bool
|
||||||
|
if l.children == nil {
|
||||||
|
// Children map needs to be created...
|
||||||
|
l.lock.RUnlock()
|
||||||
|
} else {
|
||||||
|
child, ok := l.children[selector[0]]
|
||||||
|
l.lock.RUnlock()
|
||||||
|
if ok {
|
||||||
|
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The level does not exist, take write lock for unqiue access:
|
||||||
|
l.lock.Lock()
|
||||||
|
// While this thread waited for the write lock, another thread
|
||||||
|
// could have created the child node.
|
||||||
|
if l.children != nil {
|
||||||
|
child, ok = l.children[selector[0]]
|
||||||
|
if ok {
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
child = &Level{
|
||||||
|
metrics: make([]*buffer, nMetrics),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
if l.children != nil {
|
||||||
|
l.children[selector[0]] = child
|
||||||
|
} else {
|
||||||
|
l.children = map[string]*Level{selector[0]: child}
|
||||||
|
}
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) free(t int64) (int, error) {
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
for i, b := range l.metrics {
|
||||||
|
if b != nil {
|
||||||
|
delme, m := b.free(t)
|
||||||
|
n += m
|
||||||
|
if delme {
|
||||||
|
if cap(b.data) == BUFFER_CAP {
|
||||||
|
bufferPool.Put(b)
|
||||||
|
}
|
||||||
|
l.metrics[i] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, l := range l.children {
|
||||||
|
m, err := l.free(t)
|
||||||
|
n += m
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) sizeInBytes() int64 {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
size := int64(0)
|
||||||
|
|
||||||
|
for _, b := range l.metrics {
|
||||||
|
if b != nil {
|
||||||
|
size += b.count() * int64(unsafe.Sizeof(util.Float(0)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, child := range l.children {
|
||||||
|
size += child.sizeInBytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
return size
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) findLevel(selector []string) *Level {
|
||||||
|
if len(selector) == 0 {
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
|
||||||
|
lvl := l.children[selector[0]]
|
||||||
|
if lvl == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return lvl.findLevel(selector[1:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
|
||||||
|
if len(selector) == 0 {
|
||||||
|
b := l.metrics[offset]
|
||||||
|
if b != nil {
|
||||||
|
return f(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, lvl := range l.children {
|
||||||
|
err := lvl.findBuffers(nil, offset, f)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sel := selector[0]
|
||||||
|
if len(sel.String) != 0 && l.children != nil {
|
||||||
|
lvl, ok := l.children[sel.String]
|
||||||
|
if ok {
|
||||||
|
err := lvl.findBuffers(selector[1:], offset, f)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if sel.Group != nil && l.children != nil {
|
||||||
|
for _, key := range sel.Group {
|
||||||
|
lvl, ok := l.children[key]
|
||||||
|
if ok {
|
||||||
|
err := lvl.findBuffers(selector[1:], offset, f)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if sel.Any && l.children != nil {
|
||||||
|
for _, lvl := range l.children {
|
||||||
|
if err := lvl.findBuffers(selector[1:], offset, f); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
347
internal/memorystore/lineprotocol.go
Normal file
347
internal/memorystore/lineprotocol.go
Normal file
@@ -0,0 +1,347 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/avro"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Each connection is handled in it's own goroutine. This is a blocking function.
|
||||||
|
// func ReceiveRaw(ctx context.Context,
|
||||||
|
// listener net.Listener,
|
||||||
|
// handleLine func(*lineprotocol.Decoder, string) error,
|
||||||
|
// ) error {
|
||||||
|
// var wg sync.WaitGroup
|
||||||
|
|
||||||
|
// wg.Add(1)
|
||||||
|
// go func() {
|
||||||
|
// defer wg.Done()
|
||||||
|
// <-ctx.Done()
|
||||||
|
// if err := listener.Close(); err != nil {
|
||||||
|
// log.Printf("listener.Close(): %s", err.Error())
|
||||||
|
// }
|
||||||
|
// }()
|
||||||
|
|
||||||
|
// for {
|
||||||
|
// conn, err := listener.Accept()
|
||||||
|
// if err != nil {
|
||||||
|
// if errors.Is(err, net.ErrClosed) {
|
||||||
|
// break
|
||||||
|
// }
|
||||||
|
|
||||||
|
// log.Printf("listener.Accept(): %s", err.Error())
|
||||||
|
// }
|
||||||
|
|
||||||
|
// wg.Add(2)
|
||||||
|
// go func() {
|
||||||
|
// defer wg.Done()
|
||||||
|
// defer conn.Close()
|
||||||
|
|
||||||
|
// dec := lineprotocol.NewDecoder(conn)
|
||||||
|
// connctx, cancel := context.WithCancel(context.Background())
|
||||||
|
// defer cancel()
|
||||||
|
// go func() {
|
||||||
|
// defer wg.Done()
|
||||||
|
// select {
|
||||||
|
// case <-connctx.Done():
|
||||||
|
// conn.Close()
|
||||||
|
// case <-ctx.Done():
|
||||||
|
// conn.Close()
|
||||||
|
// }
|
||||||
|
// }()
|
||||||
|
|
||||||
|
// if err := handleLine(dec, "default"); err != nil {
|
||||||
|
// if errors.Is(err, net.ErrClosed) {
|
||||||
|
// return
|
||||||
|
// }
|
||||||
|
|
||||||
|
// log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
|
||||||
|
// errmsg := make([]byte, 128)
|
||||||
|
// errmsg = append(errmsg, `error: `...)
|
||||||
|
// errmsg = append(errmsg, err.Error()...)
|
||||||
|
// errmsg = append(errmsg, '\n')
|
||||||
|
// conn.Write(errmsg)
|
||||||
|
// }
|
||||||
|
// }()
|
||||||
|
// }
|
||||||
|
|
||||||
|
// wg.Wait()
|
||||||
|
// return nil
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Connect to a nats server and subscribe to "updates". This is a blocking
|
||||||
|
// function. handleLine will be called for each line recieved via nats.
|
||||||
|
// Send `true` through the done channel for gracefull termination.
|
||||||
|
func ReceiveNats(conf *(config.NatsConfig),
|
||||||
|
ms *MemoryStore,
|
||||||
|
workers int,
|
||||||
|
ctx context.Context,
|
||||||
|
) error {
|
||||||
|
var opts []nats.Option
|
||||||
|
if conf.Username != "" && conf.Password != "" {
|
||||||
|
opts = append(opts, nats.UserInfo(conf.Username, conf.Password))
|
||||||
|
}
|
||||||
|
|
||||||
|
if conf.Credsfilepath != "" {
|
||||||
|
opts = append(opts, nats.UserCredentials(conf.Credsfilepath))
|
||||||
|
}
|
||||||
|
|
||||||
|
nc, err := nats.Connect(conf.Address, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer nc.Close()
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
var subs []*nats.Subscription
|
||||||
|
|
||||||
|
msgs := make(chan *nats.Msg, workers*2)
|
||||||
|
|
||||||
|
for _, sc := range conf.Subscriptions {
|
||||||
|
clusterTag := sc.ClusterTag
|
||||||
|
var sub *nats.Subscription
|
||||||
|
if workers > 1 {
|
||||||
|
wg.Add(workers)
|
||||||
|
|
||||||
|
for range workers {
|
||||||
|
go func() {
|
||||||
|
for m := range msgs {
|
||||||
|
dec := lineprotocol.NewDecoderWithBytes(m.Data)
|
||||||
|
if err := decodeLine(dec, ms, clusterTag); err != nil {
|
||||||
|
log.Printf("error: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
|
||||||
|
msgs <- m
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
|
||||||
|
dec := lineprotocol.NewDecoderWithBytes(m.Data)
|
||||||
|
if err := decodeLine(dec, ms, clusterTag); err != nil {
|
||||||
|
log.Printf("error: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address)
|
||||||
|
subs = append(subs, sub)
|
||||||
|
}
|
||||||
|
|
||||||
|
<-ctx.Done()
|
||||||
|
for _, sub := range subs {
|
||||||
|
err = sub.Unsubscribe()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("NATS unsubscribe failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(msgs)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
nc.Close()
|
||||||
|
log.Println("NATS connection closed")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Place `prefix` in front of `buf` but if possible,
|
||||||
|
// do that inplace in `buf`.
|
||||||
|
func reorder(buf, prefix []byte) []byte {
|
||||||
|
n := len(prefix)
|
||||||
|
m := len(buf)
|
||||||
|
if cap(buf) < m+n {
|
||||||
|
return append(prefix[:n:n], buf...)
|
||||||
|
} else {
|
||||||
|
buf = buf[:n+m]
|
||||||
|
for i := m - 1; i >= 0; i-- {
|
||||||
|
buf[i+n] = buf[i]
|
||||||
|
}
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
buf[i] = prefix[i]
|
||||||
|
}
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode lines using dec and make write calls to the MemoryStore.
|
||||||
|
// If a line is missing its cluster tag, use clusterDefault as default.
|
||||||
|
func decodeLine(dec *lineprotocol.Decoder,
|
||||||
|
ms *MemoryStore,
|
||||||
|
clusterDefault string,
|
||||||
|
) error {
|
||||||
|
// Reduce allocations in loop:
|
||||||
|
t := time.Now()
|
||||||
|
metric, metricBuf := Metric{}, make([]byte, 0, 16)
|
||||||
|
selector := make([]string, 0, 4)
|
||||||
|
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
|
||||||
|
|
||||||
|
// Optimize for the case where all lines in a "batch" are about the same
|
||||||
|
// cluster and host. By using `WriteToLevel` (level = host), we do not need
|
||||||
|
// to take the root- and cluster-level lock as often.
|
||||||
|
var lvl *Level = nil
|
||||||
|
prevCluster, prevHost := "", ""
|
||||||
|
|
||||||
|
var ok bool
|
||||||
|
for dec.Next() {
|
||||||
|
rawmeasurement, err := dec.Measurement()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Needs to be copied because another call to dec.* would
|
||||||
|
// invalidate the returned slice.
|
||||||
|
metricBuf = append(metricBuf[:0], rawmeasurement...)
|
||||||
|
|
||||||
|
// The go compiler optimizes map[string(byteslice)] lookups:
|
||||||
|
metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
|
||||||
|
cluster, host := clusterDefault, ""
|
||||||
|
for {
|
||||||
|
key, val, err := dec.NextTag()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if key == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// The go compiler optimizes string([]byte{...}) == "...":
|
||||||
|
switch string(key) {
|
||||||
|
case "cluster":
|
||||||
|
if string(val) == prevCluster {
|
||||||
|
cluster = prevCluster
|
||||||
|
} else {
|
||||||
|
cluster = string(val)
|
||||||
|
lvl = nil
|
||||||
|
}
|
||||||
|
case "hostname", "host":
|
||||||
|
if string(val) == prevHost {
|
||||||
|
host = prevHost
|
||||||
|
} else {
|
||||||
|
host = string(val)
|
||||||
|
lvl = nil
|
||||||
|
}
|
||||||
|
case "type":
|
||||||
|
if string(val) == "node" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// We cannot be sure that the "type" tag comes before the "type-id" tag:
|
||||||
|
if len(typeBuf) == 0 {
|
||||||
|
typeBuf = append(typeBuf, val...)
|
||||||
|
} else {
|
||||||
|
typeBuf = reorder(typeBuf, val)
|
||||||
|
}
|
||||||
|
case "type-id":
|
||||||
|
typeBuf = append(typeBuf, val...)
|
||||||
|
case "subtype":
|
||||||
|
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
|
||||||
|
if len(subTypeBuf) == 0 {
|
||||||
|
subTypeBuf = append(subTypeBuf, val...)
|
||||||
|
} else {
|
||||||
|
subTypeBuf = reorder(subTypeBuf, val)
|
||||||
|
// subTypeBuf = reorder(typeBuf, val)
|
||||||
|
}
|
||||||
|
case "stype-id":
|
||||||
|
subTypeBuf = append(subTypeBuf, val...)
|
||||||
|
default:
|
||||||
|
// Ignore unkown tags (cc-metric-collector might send us a unit for example that we do not need)
|
||||||
|
// return fmt.Errorf("unkown tag: '%s' (value: '%s')", string(key), string(val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the cluster or host changed, the lvl was set to nil
|
||||||
|
if lvl == nil {
|
||||||
|
selector = selector[:2]
|
||||||
|
selector[0], selector[1] = cluster, host
|
||||||
|
lvl = ms.GetLevel(selector)
|
||||||
|
prevCluster, prevHost = cluster, host
|
||||||
|
}
|
||||||
|
|
||||||
|
// subtypes:
|
||||||
|
selector = selector[:0]
|
||||||
|
if len(typeBuf) > 0 {
|
||||||
|
selector = append(selector, string(typeBuf)) // <- Allocation :(
|
||||||
|
if len(subTypeBuf) > 0 {
|
||||||
|
selector = append(selector, string(subTypeBuf))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
key, val, err := dec.NextField()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if key == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if string(key) != "value" {
|
||||||
|
return fmt.Errorf("host %s: unknown field: '%s' (value: %#v)", host, string(key), val)
|
||||||
|
}
|
||||||
|
|
||||||
|
if val.Kind() == lineprotocol.Float {
|
||||||
|
metric.Value = schema.Float(val.FloatV())
|
||||||
|
} else if val.Kind() == lineprotocol.Int {
|
||||||
|
metric.Value = schema.Float(val.IntV())
|
||||||
|
} else if val.Kind() == lineprotocol.Uint {
|
||||||
|
metric.Value = schema.Float(val.UintV())
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("host %s: unsupported value type in message: %s", host, val.Kind().String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if t, err = dec.Time(lineprotocol.Second, t); err != nil {
|
||||||
|
t = time.Now()
|
||||||
|
if t, err = dec.Time(lineprotocol.Millisecond, t); err != nil {
|
||||||
|
t = time.Now()
|
||||||
|
if t, err = dec.Time(lineprotocol.Microsecond, t); err != nil {
|
||||||
|
t = time.Now()
|
||||||
|
if t, err = dec.Time(lineprotocol.Nanosecond, t); err != nil {
|
||||||
|
return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
time := t.Unix()
|
||||||
|
|
||||||
|
if config.MetricStoreKeys.Checkpoints.FileFormat != "json" {
|
||||||
|
avro.LineProtocolMessages <- &avro.AvroStruct{
|
||||||
|
MetricName: string(metricBuf),
|
||||||
|
Cluster: cluster,
|
||||||
|
Node: host,
|
||||||
|
Selector: append([]string{}, selector...),
|
||||||
|
Value: metric.Value,
|
||||||
|
Timestamp: time}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
446
internal/memorystore/memorystore.go
Normal file
446
internal/memorystore/memorystore.go
Normal file
@@ -0,0 +1,446 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/avro"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/resampler"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
singleton sync.Once
|
||||||
|
msInstance *MemoryStore
|
||||||
|
)
|
||||||
|
|
||||||
|
var Clusters = make([]string, 0)
|
||||||
|
|
||||||
|
var NumWorkers int = 4
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
maxWorkers := 10
|
||||||
|
NumWorkers = runtime.NumCPU()/2 + 1
|
||||||
|
if NumWorkers > maxWorkers {
|
||||||
|
NumWorkers = maxWorkers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Metric struct {
|
||||||
|
Name string
|
||||||
|
Value schema.Float
|
||||||
|
MetricConfig config.MetricConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
type MemoryStore struct {
|
||||||
|
Metrics map[string]config.MetricConfig
|
||||||
|
root Level
|
||||||
|
}
|
||||||
|
|
||||||
|
func Init(wg *sync.WaitGroup) {
|
||||||
|
startupTime := time.Now()
|
||||||
|
|
||||||
|
//Pass the config.MetricStoreKeys
|
||||||
|
InitMetrics(config.Metrics)
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
|
d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Restore)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
restoreFrom := startupTime.Add(-d)
|
||||||
|
log.Printf("[METRICSTORE]> Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
|
||||||
|
files, err := ms.FromCheckpointFiles(config.MetricStoreKeys.Checkpoints.RootDir, restoreFrom.Unix())
|
||||||
|
loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("[METRICSTORE]> Loading checkpoints failed: %s\n", err.Error())
|
||||||
|
} else {
|
||||||
|
log.Printf("[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to use less memory by forcing a GC run here and then
|
||||||
|
// lowering the target percentage. The default of 100 means
|
||||||
|
// that only once the ratio of new allocations execeds the
|
||||||
|
// previously active heap, a GC is triggered.
|
||||||
|
// Forcing a GC here will set the "previously active heap"
|
||||||
|
// to a minumum.
|
||||||
|
runtime.GC()
|
||||||
|
|
||||||
|
ctx, shutdown := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
wg.Add(4)
|
||||||
|
|
||||||
|
Retention(wg, ctx)
|
||||||
|
Checkpointing(wg, ctx)
|
||||||
|
Archiving(wg, ctx)
|
||||||
|
avro.DataStaging(wg, ctx)
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
sigs := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
<-sigs
|
||||||
|
runtimeEnv.SystemdNotifiy(false, "[METRICSTORE]> Shutting down ...")
|
||||||
|
shutdown()
|
||||||
|
}()
|
||||||
|
|
||||||
|
if config.MetricStoreKeys.Nats != nil {
|
||||||
|
for _, natsConf := range config.MetricStoreKeys.Nats {
|
||||||
|
// TODO: When multiple nats configs share a URL, do a single connect.
|
||||||
|
wg.Add(1)
|
||||||
|
nc := natsConf
|
||||||
|
go func() {
|
||||||
|
// err := ReceiveNats(conf.Nats, decodeLine, runtime.NumCPU()-1, ctx)
|
||||||
|
err := ReceiveNats(nc, ms, 1, ctx)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new, initialized instance of a MemoryStore.
|
||||||
|
// Will panic if values in the metric configurations are invalid.
|
||||||
|
func InitMetrics(metrics map[string]config.MetricConfig) {
|
||||||
|
singleton.Do(func() {
|
||||||
|
offset := 0
|
||||||
|
for key, cfg := range metrics {
|
||||||
|
if cfg.Frequency == 0 {
|
||||||
|
panic("[METRICSTORE]> invalid frequency")
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics[key] = config.MetricConfig{
|
||||||
|
Frequency: cfg.Frequency,
|
||||||
|
Aggregation: cfg.Aggregation,
|
||||||
|
Offset: offset,
|
||||||
|
}
|
||||||
|
offset += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
msInstance = &MemoryStore{
|
||||||
|
root: Level{
|
||||||
|
metrics: make([]*buffer, len(metrics)),
|
||||||
|
children: make(map[string]*Level),
|
||||||
|
},
|
||||||
|
Metrics: metrics,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetMemoryStore() *MemoryStore {
|
||||||
|
if msInstance == nil {
|
||||||
|
log.Fatalf("[METRICSTORE]> MemoryStore not initialized!")
|
||||||
|
}
|
||||||
|
|
||||||
|
return msInstance
|
||||||
|
}
|
||||||
|
|
||||||
|
func Shutdown() {
|
||||||
|
log.Printf("[METRICSTORE]> Writing to '%s'...\n", config.MetricStoreKeys.Checkpoints.RootDir)
|
||||||
|
var files int
|
||||||
|
var err error
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
|
if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
|
||||||
|
files, err = ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||||
|
} else {
|
||||||
|
files, err = avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, true)
|
||||||
|
close(avro.LineProtocolMessages)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
log.Printf("[METRICSTORE]> Done! (%d files written)\n", files)
|
||||||
|
|
||||||
|
// ms.PrintHeirarchy()
|
||||||
|
}
|
||||||
|
|
||||||
|
// func (m *MemoryStore) PrintHeirarchy() {
|
||||||
|
// m.root.lock.Lock()
|
||||||
|
// defer m.root.lock.Unlock()
|
||||||
|
|
||||||
|
// fmt.Printf("Root : \n")
|
||||||
|
|
||||||
|
// for lvl1, sel1 := range m.root.children {
|
||||||
|
// fmt.Printf("\t%s\n", lvl1)
|
||||||
|
// for lvl2, sel2 := range sel1.children {
|
||||||
|
// fmt.Printf("\t\t%s\n", lvl2)
|
||||||
|
// if lvl1 == "fritz" && lvl2 == "f0201" {
|
||||||
|
|
||||||
|
// for name, met := range m.Metrics {
|
||||||
|
// mt := sel2.metrics[met.Offset]
|
||||||
|
|
||||||
|
// fmt.Printf("\t\t\t\t%s\n", name)
|
||||||
|
// fmt.Printf("\t\t\t\t")
|
||||||
|
|
||||||
|
// for mt != nil {
|
||||||
|
// // if name == "cpu_load" {
|
||||||
|
// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
|
||||||
|
// // }
|
||||||
|
// mt = mt.prev
|
||||||
|
// }
|
||||||
|
// fmt.Printf("\n")
|
||||||
|
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// for lvl3, sel3 := range sel2.children {
|
||||||
|
// if lvl1 == "fritz" && lvl2 == "f0201" && lvl3 == "hwthread70" {
|
||||||
|
|
||||||
|
// fmt.Printf("\t\t\t\t\t%s\n", lvl3)
|
||||||
|
|
||||||
|
// for name, met := range m.Metrics {
|
||||||
|
// mt := sel3.metrics[met.Offset]
|
||||||
|
|
||||||
|
// fmt.Printf("\t\t\t\t\t\t%s\n", name)
|
||||||
|
|
||||||
|
// fmt.Printf("\t\t\t\t\t\t")
|
||||||
|
|
||||||
|
// for mt != nil {
|
||||||
|
// // if name == "clock" {
|
||||||
|
// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
|
||||||
|
|
||||||
|
// mt = mt.prev
|
||||||
|
// }
|
||||||
|
// fmt.Printf("\n")
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // for i, _ := range sel3.metrics {
|
||||||
|
// // fmt.Printf("\t\t\t\t\t%s\n", getName(configmetrics, i))
|
||||||
|
// // }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
|
func getName(m *MemoryStore, i int) string {
|
||||||
|
for key, val := range m.Metrics {
|
||||||
|
if val.Offset == i {
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
d, err := time.ParseDuration(config.MetricStoreKeys.RetentionInMemory)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if d <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ticks := func() <-chan time.Time {
|
||||||
|
d := d / 2
|
||||||
|
if d <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return time.NewTicker(d).C
|
||||||
|
}()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticks:
|
||||||
|
t := time.Now().Add(-d)
|
||||||
|
log.Printf("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339))
|
||||||
|
freed, err := ms.Free(nil, t.Unix())
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error())
|
||||||
|
} else {
|
||||||
|
log.Printf("[METRICSTORE]> done: %d buffers freed\n", freed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write all values in `metrics` to the level specified by `selector` for time `ts`.
|
||||||
|
// Look at `findLevelOrCreate` for how selectors work.
|
||||||
|
func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {
|
||||||
|
var ok bool
|
||||||
|
for i, metric := range metrics {
|
||||||
|
if metric.MetricConfig.Frequency == 0 {
|
||||||
|
metric.MetricConfig, ok = m.Metrics[metric.Name]
|
||||||
|
if !ok {
|
||||||
|
metric.MetricConfig.Frequency = 0
|
||||||
|
}
|
||||||
|
metrics[i] = metric
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return m.WriteToLevel(&m.root, selector, ts, metrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) GetLevel(selector []string) *Level {
|
||||||
|
return m.root.findLevelOrCreate(selector, len(m.Metrics))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assumes that `minfo` in `metrics` is filled in!
|
||||||
|
func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {
|
||||||
|
l = l.findLevelOrCreate(selector, len(m.Metrics))
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
for _, metric := range metrics {
|
||||||
|
if metric.MetricConfig.Frequency == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
b := l.metrics[metric.MetricConfig.Offset]
|
||||||
|
if b == nil {
|
||||||
|
// First write to this metric and level
|
||||||
|
b = newBuffer(ts, metric.MetricConfig.Frequency)
|
||||||
|
l.metrics[metric.MetricConfig.Offset] = b
|
||||||
|
}
|
||||||
|
|
||||||
|
nb, err := b.write(ts, metric.Value)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last write created a new buffer...
|
||||||
|
if b != nb {
|
||||||
|
l.metrics[metric.MetricConfig.Offset] = nb
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns all values for metric `metric` from `from` to `to` for the selected level(s).
|
||||||
|
// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
|
||||||
|
// The second and third return value are the actual from/to for the data. Those can be different from
|
||||||
|
// the range asked for if no data was available.
|
||||||
|
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
|
||||||
|
if from > to {
|
||||||
|
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
minfo, ok := m.Metrics[metric]
|
||||||
|
if !ok {
|
||||||
|
return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: \n" + metric)
|
||||||
|
}
|
||||||
|
|
||||||
|
n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)
|
||||||
|
|
||||||
|
err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error {
|
||||||
|
cdata, cfrom, cto, err := b.read(from, to, data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if n == 0 {
|
||||||
|
from, to = cfrom, cto
|
||||||
|
} else if from != cfrom || to != cto || len(data) != len(cdata) {
|
||||||
|
missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)
|
||||||
|
if missingfront != 0 {
|
||||||
|
return ErrDataDoesNotAlign
|
||||||
|
}
|
||||||
|
|
||||||
|
newlen := len(cdata) - missingback
|
||||||
|
if newlen < 1 {
|
||||||
|
return ErrDataDoesNotAlign
|
||||||
|
}
|
||||||
|
cdata = cdata[0:newlen]
|
||||||
|
if len(cdata) != len(data) {
|
||||||
|
return ErrDataDoesNotAlign
|
||||||
|
}
|
||||||
|
|
||||||
|
from, to = cfrom, cto
|
||||||
|
}
|
||||||
|
|
||||||
|
data = cdata
|
||||||
|
n += 1
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, 0, 0, err
|
||||||
|
} else if n == 0 {
|
||||||
|
return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found\n")
|
||||||
|
} else if n > 1 {
|
||||||
|
if minfo.Aggregation == config.AvgAggregation {
|
||||||
|
normalize := 1. / schema.Float(n)
|
||||||
|
for i := 0; i < len(data); i++ {
|
||||||
|
data[i] *= normalize
|
||||||
|
}
|
||||||
|
} else if minfo.Aggregation != config.SumAggregation {
|
||||||
|
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid aggregation")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, 0, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, from, to, resolution, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release all buffers for the selected level and all its children that contain only
|
||||||
|
// values older than `t`.
|
||||||
|
func (m *MemoryStore) Free(selector []string, t int64) (int, error) {
|
||||||
|
return m.GetLevel(selector).free(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) FreeAll() error {
|
||||||
|
for k := range m.root.children {
|
||||||
|
delete(m.root.children, k)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) SizeInBytes() int64 {
|
||||||
|
return m.root.sizeInBytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given a selector, return a list of all children of the level selected.
|
||||||
|
func (m *MemoryStore) ListChildren(selector []string) []string {
|
||||||
|
lvl := &m.root
|
||||||
|
for lvl != nil && len(selector) != 0 {
|
||||||
|
lvl.lock.RLock()
|
||||||
|
next := lvl.children[selector[0]]
|
||||||
|
lvl.lock.RUnlock()
|
||||||
|
lvl = next
|
||||||
|
selector = selector[1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
if lvl == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
lvl.lock.RLock()
|
||||||
|
defer lvl.lock.RUnlock()
|
||||||
|
|
||||||
|
children := make([]string, 0, len(lvl.children))
|
||||||
|
for child := range lvl.children {
|
||||||
|
children = append(children, child)
|
||||||
|
}
|
||||||
|
|
||||||
|
return children
|
||||||
|
}
|
||||||
120
internal/memorystore/stats.go
Normal file
120
internal/memorystore/stats.go
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Stats struct {
|
||||||
|
Samples int
|
||||||
|
Avg util.Float
|
||||||
|
Min util.Float
|
||||||
|
Max util.Float
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {
|
||||||
|
if from < b.start {
|
||||||
|
if b.prev != nil {
|
||||||
|
return b.prev.stats(from, to)
|
||||||
|
}
|
||||||
|
from = b.start
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Check if b.closed and if so and the full buffer is queried,
|
||||||
|
// use b.statistics instead of iterating over the buffer.
|
||||||
|
|
||||||
|
samples := 0
|
||||||
|
sum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||||
|
|
||||||
|
var t int64
|
||||||
|
for t = from; t < to; t += b.frequency {
|
||||||
|
idx := int((t - b.start) / b.frequency)
|
||||||
|
if idx >= cap(b.data) {
|
||||||
|
b = b.next
|
||||||
|
if b == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
idx = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if t < b.start || idx >= len(b.data) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
xf := float64(b.data[idx])
|
||||||
|
if math.IsNaN(xf) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
samples += 1
|
||||||
|
sum += xf
|
||||||
|
min = math.Min(min, xf)
|
||||||
|
max = math.Max(max, xf)
|
||||||
|
}
|
||||||
|
|
||||||
|
return Stats{
|
||||||
|
Samples: samples,
|
||||||
|
Avg: util.Float(sum) / util.Float(samples),
|
||||||
|
Min: util.Float(min),
|
||||||
|
Max: util.Float(max),
|
||||||
|
}, from, t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns statistics for the requested metric on the selected node/level.
|
||||||
|
// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.
|
||||||
|
// If `Stats.Samples` is zero, the statistics should not be considered as valid.
|
||||||
|
func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) {
|
||||||
|
if from > to {
|
||||||
|
return nil, 0, 0, errors.New("invalid time range")
|
||||||
|
}
|
||||||
|
|
||||||
|
minfo, ok := m.Metrics[metric]
|
||||||
|
if !ok {
|
||||||
|
return nil, 0, 0, errors.New("unkown metric: " + metric)
|
||||||
|
}
|
||||||
|
|
||||||
|
n, samples := 0, 0
|
||||||
|
avg, min, max := util.Float(0), math.MaxFloat32, -math.MaxFloat32
|
||||||
|
err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error {
|
||||||
|
stats, cfrom, cto, err := b.stats(from, to)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if n == 0 {
|
||||||
|
from, to = cfrom, cto
|
||||||
|
} else if from != cfrom || to != cto {
|
||||||
|
return ErrDataDoesNotAlign
|
||||||
|
}
|
||||||
|
|
||||||
|
samples += stats.Samples
|
||||||
|
avg += stats.Avg
|
||||||
|
min = math.Min(min, float64(stats.Min))
|
||||||
|
max = math.Max(max, float64(stats.Max))
|
||||||
|
n += 1
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if n == 0 {
|
||||||
|
return nil, 0, 0, ErrNoData
|
||||||
|
}
|
||||||
|
|
||||||
|
if minfo.Aggregation == config.AvgAggregation {
|
||||||
|
avg /= util.Float(n)
|
||||||
|
} else if n > 1 && minfo.Aggregation != config.SumAggregation {
|
||||||
|
return nil, 0, 0, errors.New("invalid aggregation")
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Stats{
|
||||||
|
Samples: samples,
|
||||||
|
Avg: avg,
|
||||||
|
Min: util.Float(min),
|
||||||
|
Max: util.Float(max),
|
||||||
|
}, from, to, nil
|
||||||
|
}
|
||||||
@@ -91,14 +91,14 @@ func LoadData(job *schema.Job,
|
|||||||
// Pass the resolution from frontend here.
|
// Pass the resolution from frontend here.
|
||||||
for _, v := range jd {
|
for _, v := range jd {
|
||||||
for _, v_ := range v {
|
for _, v_ := range v {
|
||||||
timestep := 0
|
timestep := int64(0)
|
||||||
for i := 0; i < len(v_.Series); i += 1 {
|
for i := 0; i < len(v_.Series); i += 1 {
|
||||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
|
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err, 0, 0
|
return err, 0, 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
v_.Timestep = timestep
|
v_.Timestep = int(timestep)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,23 +5,22 @@
|
|||||||
package metricdata
|
package metricdata
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Bloat Code
|
||||||
type CCMetricStoreConfig struct {
|
type CCMetricStoreConfig struct {
|
||||||
Kind string `json:"kind"`
|
Kind string `json:"kind"`
|
||||||
Url string `json:"url"`
|
Url string `json:"url"`
|
||||||
@@ -33,141 +32,16 @@ type CCMetricStoreConfig struct {
|
|||||||
Renamings map[string]string `json:"metricRenamings"`
|
Renamings map[string]string `json:"metricRenamings"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bloat Code
|
||||||
type CCMetricStore struct {
|
type CCMetricStore struct {
|
||||||
here2there map[string]string
|
|
||||||
there2here map[string]string
|
|
||||||
client http.Client
|
|
||||||
jwt string
|
|
||||||
url string
|
|
||||||
queryEndpoint string
|
|
||||||
}
|
|
||||||
|
|
||||||
type ApiQueryRequest struct {
|
|
||||||
Cluster string `json:"cluster"`
|
|
||||||
Queries []ApiQuery `json:"queries"`
|
|
||||||
ForAllNodes []string `json:"for-all-nodes"`
|
|
||||||
From int64 `json:"from"`
|
|
||||||
To int64 `json:"to"`
|
|
||||||
WithStats bool `json:"with-stats"`
|
|
||||||
WithData bool `json:"with-data"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type ApiQuery struct {
|
|
||||||
Type *string `json:"type,omitempty"`
|
|
||||||
SubType *string `json:"subtype,omitempty"`
|
|
||||||
Metric string `json:"metric"`
|
|
||||||
Hostname string `json:"host"`
|
|
||||||
Resolution int `json:"resolution"`
|
|
||||||
TypeIds []string `json:"type-ids,omitempty"`
|
|
||||||
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
|
||||||
Aggregate bool `json:"aggreg"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type ApiQueryResponse struct {
|
|
||||||
Queries []ApiQuery `json:"queries,omitempty"`
|
|
||||||
Results [][]ApiMetricData `json:"results"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type ApiMetricData struct {
|
|
||||||
Error *string `json:"error"`
|
|
||||||
Data []schema.Float `json:"data"`
|
|
||||||
From int64 `json:"from"`
|
|
||||||
To int64 `json:"to"`
|
|
||||||
Resolution int `json:"resolution"`
|
|
||||||
Avg schema.Float `json:"avg"`
|
|
||||||
Min schema.Float `json:"min"`
|
|
||||||
Max schema.Float `json:"max"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bloat Code
|
||||||
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
|
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
|
||||||
var config CCMetricStoreConfig
|
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
|
||||||
cclog.Warn("Error while unmarshaling raw json config")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
ccms.url = config.Url
|
|
||||||
ccms.queryEndpoint = fmt.Sprintf("%s/api/query", config.Url)
|
|
||||||
ccms.jwt = config.Token
|
|
||||||
ccms.client = http.Client{
|
|
||||||
Timeout: 10 * time.Second,
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.Renamings != nil {
|
|
||||||
ccms.here2there = config.Renamings
|
|
||||||
ccms.there2here = make(map[string]string, len(config.Renamings))
|
|
||||||
for k, v := range ccms.here2there {
|
|
||||||
ccms.there2here[v] = k
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ccms.here2there = make(map[string]string)
|
|
||||||
ccms.there2here = make(map[string]string)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ccms *CCMetricStore) toRemoteName(metric string) string {
|
|
||||||
if renamed, ok := ccms.here2there[metric]; ok {
|
|
||||||
return renamed
|
|
||||||
}
|
|
||||||
|
|
||||||
return metric
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ccms *CCMetricStore) toLocalName(metric string) string {
|
|
||||||
if renamed, ok := ccms.there2here[metric]; ok {
|
|
||||||
return renamed
|
|
||||||
}
|
|
||||||
|
|
||||||
return metric
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ccms *CCMetricStore) doRequest(
|
|
||||||
ctx context.Context,
|
|
||||||
body *ApiQueryRequest,
|
|
||||||
) (*ApiQueryResponse, error) {
|
|
||||||
buf := &bytes.Buffer{}
|
|
||||||
if err := json.NewEncoder(buf).Encode(body); err != nil {
|
|
||||||
cclog.Errorf("Error while encoding request body: %s", err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("Error while building request body: %s", err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if ccms.jwt != "" {
|
|
||||||
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
|
||||||
}
|
|
||||||
|
|
||||||
// versioning the cc-metric-store query API.
|
|
||||||
// v2 = data with resampling
|
|
||||||
// v1 = data without resampling
|
|
||||||
q := req.URL.Query()
|
|
||||||
q.Add("version", "v2")
|
|
||||||
req.URL.RawQuery = q.Encode()
|
|
||||||
|
|
||||||
res, err := ccms.client.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if res.StatusCode != http.StatusOK {
|
|
||||||
return nil, fmt.Errorf("'%s': HTTP Status: %s", ccms.queryEndpoint, res.Status)
|
|
||||||
}
|
|
||||||
|
|
||||||
var resBody ApiQueryResponse
|
|
||||||
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
|
|
||||||
cclog.Errorf("Error while decoding result body: %s", err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &resBody, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ccms *CCMetricStore) LoadData(
|
func (ccms *CCMetricStore) LoadData(
|
||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
@@ -175,13 +49,13 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
resolution int,
|
resolution int,
|
||||||
) (schema.JobData, error) {
|
) (schema.JobData, error) {
|
||||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
|
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, int64(resolution))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error())
|
cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req := ApiQueryRequest{
|
req := memorystore.ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime,
|
From: job.StartTime,
|
||||||
To: job.StartTime + int64(job.Duration),
|
To: job.StartTime + int64(job.Duration),
|
||||||
@@ -190,9 +64,9 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
WithData: true,
|
WithData: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
resBody, err := ccms.doRequest(ctx, &req)
|
resBody, err := memorystore.FetchData(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
cclog.Errorf("Error while fetching data : %s", err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -200,7 +74,7 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
jobData := make(schema.JobData)
|
jobData := make(schema.JobData)
|
||||||
for i, row := range resBody.Results {
|
for i, row := range resBody.Results {
|
||||||
query := req.Queries[i]
|
query := req.Queries[i]
|
||||||
metric := ccms.toLocalName(query.Metric)
|
metric := query.Metric
|
||||||
scope := assignedScope[i]
|
scope := assignedScope[i]
|
||||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||||
if _, ok := jobData[metric]; !ok {
|
if _, ok := jobData[metric]; !ok {
|
||||||
@@ -209,7 +83,7 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
|
|
||||||
res := mc.Timestep
|
res := mc.Timestep
|
||||||
if len(row) > 0 {
|
if len(row) > 0 {
|
||||||
res = row[0].Resolution
|
res = int(row[0].Resolution)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobMetric, ok := jobData[metric][scope]
|
jobMetric, ok := jobData[metric][scope]
|
||||||
@@ -282,9 +156,9 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
resolution int,
|
resolution int64,
|
||||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
) ([]memorystore.ApiQuery, []schema.MetricScope, error) {
|
||||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||||
assignedScope := []schema.MetricScope{}
|
assignedScope := []schema.MetricScope{}
|
||||||
|
|
||||||
subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||||
@@ -294,7 +168,6 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
topology := subcluster.Topology
|
topology := subcluster.Topology
|
||||||
|
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
remoteName := ccms.toRemoteName(metric)
|
|
||||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||||
if mc == nil {
|
if mc == nil {
|
||||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||||
@@ -306,7 +179,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
if len(mc.SubClusters) != 0 {
|
if len(mc.SubClusters) != 0 {
|
||||||
isRemoved := false
|
isRemoved := false
|
||||||
for _, scConfig := range mc.SubClusters {
|
for _, scConfig := range mc.SubClusters {
|
||||||
if scConfig.Name == job.SubCluster && scConfig.Remove == true {
|
if scConfig.Name == job.SubCluster && scConfig.Remove {
|
||||||
isRemoved = true
|
isRemoved = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -347,8 +220,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &acceleratorString,
|
Type: &acceleratorString,
|
||||||
@@ -365,8 +238,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &acceleratorString,
|
Type: &acceleratorString,
|
||||||
@@ -379,8 +252,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
|
|
||||||
// HWThread -> HWThead
|
// HWThread -> HWThead
|
||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -395,8 +268,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
||||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||||
for _, core := range cores {
|
for _, core := range cores {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -412,8 +285,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
||||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||||
for _, socket := range sockets {
|
for _, socket := range sockets {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -427,8 +300,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
|
|
||||||
// HWThread -> Node
|
// HWThread -> Node
|
||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -442,8 +315,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
// Core -> Core
|
// Core -> Core
|
||||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
@@ -458,8 +331,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
||||||
sockets, _ := topology.GetSocketsFromCores(hwthreads)
|
sockets, _ := topology.GetSocketsFromCores(hwthreads)
|
||||||
for _, socket := range sockets {
|
for _, socket := range sockets {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
@@ -474,8 +347,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
// Core -> Node
|
// Core -> Node
|
||||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
@@ -489,8 +362,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
// MemoryDomain -> MemoryDomain
|
// MemoryDomain -> MemoryDomain
|
||||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &memoryDomainString,
|
Type: &memoryDomainString,
|
||||||
@@ -504,8 +377,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
// MemoryDoman -> Node
|
// MemoryDoman -> Node
|
||||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &memoryDomainString,
|
Type: &memoryDomainString,
|
||||||
@@ -519,8 +392,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
// Socket -> Socket
|
// Socket -> Socket
|
||||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &socketString,
|
Type: &socketString,
|
||||||
@@ -534,8 +407,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
// Socket -> Node
|
// Socket -> Node
|
||||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &socketString,
|
Type: &socketString,
|
||||||
@@ -548,8 +421,8 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
|
|
||||||
// Node -> Node
|
// Node -> Node
|
||||||
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
Resolution: resolution,
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
@@ -576,7 +449,7 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req := ApiQueryRequest{
|
req := memorystore.ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime,
|
From: job.StartTime,
|
||||||
To: job.StartTime + int64(job.Duration),
|
To: job.StartTime + int64(job.Duration),
|
||||||
@@ -585,16 +458,16 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
WithData: false,
|
WithData: false,
|
||||||
}
|
}
|
||||||
|
|
||||||
resBody, err := ccms.doRequest(ctx, &req)
|
resBody, err := memorystore.FetchData(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
cclog.Errorf("Error while fetching data : %s", err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
stats := make(map[string]map[string]schema.MetricStatistics, len(metrics))
|
stats := make(map[string]map[string]schema.MetricStatistics, len(metrics))
|
||||||
for i, res := range resBody.Results {
|
for i, res := range resBody.Results {
|
||||||
query := req.Queries[i]
|
query := req.Queries[i]
|
||||||
metric := ccms.toLocalName(query.Metric)
|
metric := query.Metric
|
||||||
data := res[0]
|
data := res[0]
|
||||||
if data.Error != nil {
|
if data.Error != nil {
|
||||||
cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||||
@@ -635,7 +508,7 @@ func (ccms *CCMetricStore) LoadScopedStats(
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req := ApiQueryRequest{
|
req := memorystore.ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime,
|
From: job.StartTime,
|
||||||
To: job.StartTime + int64(job.Duration),
|
To: job.StartTime + int64(job.Duration),
|
||||||
@@ -644,9 +517,9 @@ func (ccms *CCMetricStore) LoadScopedStats(
|
|||||||
WithData: false,
|
WithData: false,
|
||||||
}
|
}
|
||||||
|
|
||||||
resBody, err := ccms.doRequest(ctx, &req)
|
resBody, err := memorystore.FetchData(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
cclog.Errorf("Error while fetching data : %s", err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -655,7 +528,7 @@ func (ccms *CCMetricStore) LoadScopedStats(
|
|||||||
|
|
||||||
for i, row := range resBody.Results {
|
for i, row := range resBody.Results {
|
||||||
query := req.Queries[i]
|
query := req.Queries[i]
|
||||||
metric := ccms.toLocalName(query.Metric)
|
metric := query.Metric
|
||||||
scope := assignedScope[i]
|
scope := assignedScope[i]
|
||||||
|
|
||||||
if _, ok := scopedJobStats[metric]; !ok {
|
if _, ok := scopedJobStats[metric]; !ok {
|
||||||
@@ -721,7 +594,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
|||||||
from, to time.Time,
|
from, to time.Time,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||||
req := ApiQueryRequest{
|
req := memorystore.ApiQueryRequest{
|
||||||
Cluster: cluster,
|
Cluster: cluster,
|
||||||
From: from.Unix(),
|
From: from.Unix(),
|
||||||
To: to.Unix(),
|
To: to.Unix(),
|
||||||
@@ -730,38 +603,36 @@ func (ccms *CCMetricStore) LoadNodeData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if nodes == nil {
|
if nodes == nil {
|
||||||
for _, metric := range metrics {
|
req.ForAllNodes = append(req.ForAllNodes, metrics...)
|
||||||
req.ForAllNodes = append(req.ForAllNodes, ccms.toRemoteName(metric))
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
req.Queries = append(req.Queries, ApiQuery{
|
req.Queries = append(req.Queries, memorystore.ApiQuery{
|
||||||
Hostname: node,
|
Hostname: node,
|
||||||
Metric: ccms.toRemoteName(metric),
|
Metric: metric,
|
||||||
Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution
|
Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resBody, err := ccms.doRequest(ctx, &req)
|
resBody, err := memorystore.FetchData(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
cclog.Errorf("Error while fetching data : %s", err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var errors []string
|
var errors []string
|
||||||
data := make(map[string]map[string][]*schema.JobMetric)
|
data := make(map[string]map[string][]*schema.JobMetric)
|
||||||
for i, res := range resBody.Results {
|
for i, res := range resBody.Results {
|
||||||
var query ApiQuery
|
var query memorystore.ApiQuery
|
||||||
if resBody.Queries != nil {
|
if resBody.Queries != nil {
|
||||||
query = resBody.Queries[i]
|
query = resBody.Queries[i]
|
||||||
} else {
|
} else {
|
||||||
query = req.Queries[i]
|
query = req.Queries[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
metric := ccms.toLocalName(query.Metric)
|
metric := query.Metric
|
||||||
qdata := res[0]
|
qdata := res[0]
|
||||||
if qdata.Error != nil {
|
if qdata.Error != nil {
|
||||||
/* Build list for "partial errors", if any */
|
/* Build list for "partial errors", if any */
|
||||||
@@ -861,13 +732,13 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
|||||||
|
|
||||||
// Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria
|
// Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria
|
||||||
|
|
||||||
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error())
|
cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error())
|
||||||
return nil, totalNodes, hasNextPage, err
|
return nil, totalNodes, hasNextPage, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req := ApiQueryRequest{
|
req := memorystore.ApiQueryRequest{
|
||||||
Cluster: cluster,
|
Cluster: cluster,
|
||||||
Queries: queries,
|
Queries: queries,
|
||||||
From: from.Unix(),
|
From: from.Unix(),
|
||||||
@@ -876,29 +747,29 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
|||||||
WithData: true,
|
WithData: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
resBody, err := ccms.doRequest(ctx, &req)
|
resBody, err := memorystore.FetchData(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
cclog.Errorf("Error while fetching data : %s", err.Error())
|
||||||
return nil, totalNodes, hasNextPage, err
|
return nil, totalNodes, hasNextPage, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var errors []string
|
var errors []string
|
||||||
data := make(map[string]schema.JobData)
|
data := make(map[string]schema.JobData)
|
||||||
for i, row := range resBody.Results {
|
for i, row := range resBody.Results {
|
||||||
var query ApiQuery
|
var query memorystore.ApiQuery
|
||||||
if resBody.Queries != nil {
|
if resBody.Queries != nil {
|
||||||
query = resBody.Queries[i]
|
query = resBody.Queries[i]
|
||||||
} else {
|
} else {
|
||||||
query = req.Queries[i]
|
query = req.Queries[i]
|
||||||
}
|
}
|
||||||
// qdata := res[0]
|
// qdata := res[0]
|
||||||
metric := ccms.toLocalName(query.Metric)
|
metric := query.Metric
|
||||||
scope := assignedScope[i]
|
scope := assignedScope[i]
|
||||||
mc := archive.GetMetricConfig(cluster, metric)
|
mc := archive.GetMetricConfig(cluster, metric)
|
||||||
|
|
||||||
res := mc.Timestep
|
res := mc.Timestep
|
||||||
if len(row) > 0 {
|
if len(row) > 0 {
|
||||||
res = row[0].Resolution
|
res = int(row[0].Resolution)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init Nested Map Data Structures If Not Found
|
// Init Nested Map Data Structures If Not Found
|
||||||
@@ -971,9 +842,9 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
nodes []string,
|
nodes []string,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
resolution int,
|
resolution int64,
|
||||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
) ([]memorystore.ApiQuery, []schema.MetricScope, error) {
|
||||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||||
assignedScope := []schema.MetricScope{}
|
assignedScope := []schema.MetricScope{}
|
||||||
|
|
||||||
// Get Topol before loop if subCluster given
|
// Get Topol before loop if subCluster given
|
||||||
@@ -988,7 +859,7 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
remoteName := ccms.toRemoteName(metric)
|
metric := metric
|
||||||
mc := archive.GetMetricConfig(cluster, metric)
|
mc := archive.GetMetricConfig(cluster, metric)
|
||||||
if mc == nil {
|
if mc == nil {
|
||||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
|
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||||
@@ -1000,7 +871,7 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
if mc.SubClusters != nil {
|
if mc.SubClusters != nil {
|
||||||
isRemoved := false
|
isRemoved := false
|
||||||
for _, scConfig := range mc.SubClusters {
|
for _, scConfig := range mc.SubClusters {
|
||||||
if scConfig.Name == subCluster && scConfig.Remove == true {
|
if scConfig.Name == subCluster && scConfig.Remove {
|
||||||
isRemoved = true
|
isRemoved = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -1056,8 +927,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &acceleratorString,
|
Type: &acceleratorString,
|
||||||
@@ -1074,8 +945,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &acceleratorString,
|
Type: &acceleratorString,
|
||||||
@@ -1088,8 +959,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
|
|
||||||
// HWThread -> HWThead
|
// HWThread -> HWThead
|
||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -1104,8 +975,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
||||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
for _, core := range cores {
|
for _, core := range cores {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -1121,8 +992,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
||||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
for _, socket := range sockets {
|
for _, socket := range sockets {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -1136,8 +1007,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
|
|
||||||
// HWThread -> Node
|
// HWThread -> Node
|
||||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
@@ -1151,8 +1022,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
// Core -> Core
|
// Core -> Core
|
||||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
@@ -1167,8 +1038,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
||||||
sockets, _ := topology.GetSocketsFromCores(topology.Node)
|
sockets, _ := topology.GetSocketsFromCores(topology.Node)
|
||||||
for _, socket := range sockets {
|
for _, socket := range sockets {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
@@ -1183,8 +1054,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
// Core -> Node
|
// Core -> Node
|
||||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
@@ -1198,8 +1069,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
// MemoryDomain -> MemoryDomain
|
// MemoryDomain -> MemoryDomain
|
||||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &memoryDomainString,
|
Type: &memoryDomainString,
|
||||||
@@ -1213,8 +1084,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
// MemoryDoman -> Node
|
// MemoryDoman -> Node
|
||||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &memoryDomainString,
|
Type: &memoryDomainString,
|
||||||
@@ -1228,8 +1099,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
// Socket -> Socket
|
// Socket -> Socket
|
||||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &socketString,
|
Type: &socketString,
|
||||||
@@ -1243,8 +1114,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
// Socket -> Node
|
// Socket -> Node
|
||||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &socketString,
|
Type: &socketString,
|
||||||
@@ -1257,8 +1128,8 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
|
|
||||||
// Node -> Node
|
// Node -> Node
|
||||||
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, memorystore.ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: metric,
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Resolution: resolution,
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -74,9 +74,8 @@ func (tmdr *TestMetricDataRepository) LoadNodeListData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
||||||
var jd schema.JobData
|
|
||||||
|
|
||||||
jd = make(schema.JobData, len(jd_temp))
|
jd := make(schema.JobData, len(jd_temp))
|
||||||
for k, v := range jd_temp {
|
for k, v := range jd_temp {
|
||||||
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
|
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
|
||||||
for k_, v_ := range v {
|
for k_, v_ := range v {
|
||||||
|
|||||||
@@ -52,18 +52,18 @@ func GetJobRepository() *JobRepository {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var jobColumns []string = []string{
|
var jobColumns []string = []string{
|
||||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
|
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.hpc_cluster", "job.subcluster",
|
||||||
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
||||||
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status",
|
"job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status",
|
||||||
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
|
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
|
||||||
"job.footprint", "job.energy",
|
"job.footprint", "job.energy",
|
||||||
}
|
}
|
||||||
|
|
||||||
var jobCacheColumns []string = []string{
|
var jobCacheColumns []string = []string{
|
||||||
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
|
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.hpc_cluster",
|
||||||
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
||||||
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
|
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
|
||||||
"job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt",
|
"job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt",
|
||||||
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
|
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
|
||||||
"job_cache.footprint", "job_cache.energy",
|
"job_cache.footprint", "job_cache.energy",
|
||||||
}
|
}
|
||||||
@@ -390,7 +390,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
|||||||
start := time.Now()
|
start := time.Now()
|
||||||
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
|
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
|
||||||
parts := []string{}
|
parts := []string{}
|
||||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.hpc_cluster = ?;`, cluster); err != nil {
|
||||||
return nil, 0, 1000
|
return nil, 0, 1000
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -410,7 +410,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
|||||||
subclusters := make(map[string]map[string]int)
|
subclusters := make(map[string]map[string]int)
|
||||||
rows, err := sq.Select("resources", "subcluster").From("job").
|
rows, err := sq.Select("resources", "subcluster").From("job").
|
||||||
Where("job.job_state = 'running'").
|
Where("job.job_state = 'running'").
|
||||||
Where("job.cluster = ?", cluster).
|
Where("job.hpc_cluster = ?", cluster).
|
||||||
RunWith(r.stmtCache).Query()
|
RunWith(r.stmtCache).Query()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Error("Error while running query")
|
cclog.Error("Error while running query")
|
||||||
@@ -505,7 +505,7 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
|
|||||||
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
||||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||||
query := sq.Select(jobColumns...).From("job").
|
query := sq.Select(jobColumns...).From("job").
|
||||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
Where(fmt.Sprintf("job.hpc_cluster = '%s'", cluster)).
|
||||||
Where("job.job_state = 'running'").
|
Where("job.job_state = 'running'").
|
||||||
Where("job.duration > 600")
|
Where("job.duration > 600")
|
||||||
|
|
||||||
|
|||||||
@@ -14,19 +14,19 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const NamedJobCacheInsert string = `INSERT INTO job_cache (
|
const NamedJobCacheInsert string = `INSERT INTO job_cache (
|
||||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||||
) VALUES (
|
) VALUES (
|
||||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||||
);`
|
);`
|
||||||
|
|
||||||
const NamedJobInsert string = `INSERT INTO job (
|
const NamedJobInsert string = `INSERT INTO job (
|
||||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||||
) VALUES (
|
) VALUES (
|
||||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||||
);`
|
);`
|
||||||
|
|
||||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||||
@@ -70,7 +70,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_, err = r.DB.Exec(
|
_, err = r.DB.Exec(
|
||||||
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
"INSERT INTO job (job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("Error while Job sync: %v", err)
|
cclog.Warnf("Error while Job sync: %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func (r *JobRepository) Find(
|
|||||||
Where("job.job_id = ?", *jobId)
|
Where("job.job_id = ?", *jobId)
|
||||||
|
|
||||||
if cluster != nil {
|
if cluster != nil {
|
||||||
q = q.Where("job.cluster = ?", *cluster)
|
q = q.Where("job.hpc_cluster = ?", *cluster)
|
||||||
}
|
}
|
||||||
if startTime != nil {
|
if startTime != nil {
|
||||||
q = q.Where("job.start_time = ?", *startTime)
|
q = q.Where("job.start_time = ?", *startTime)
|
||||||
@@ -52,7 +52,7 @@ func (r *JobRepository) FindCached(
|
|||||||
Where("job_cache.job_id = ?", *jobId)
|
Where("job_cache.job_id = ?", *jobId)
|
||||||
|
|
||||||
if cluster != nil {
|
if cluster != nil {
|
||||||
q = q.Where("job_cache.cluster = ?", *cluster)
|
q = q.Where("job_cache.hpc_cluster = ?", *cluster)
|
||||||
}
|
}
|
||||||
if startTime != nil {
|
if startTime != nil {
|
||||||
q = q.Where("job_cache.start_time = ?", *startTime)
|
q = q.Where("job_cache.start_time = ?", *startTime)
|
||||||
@@ -78,7 +78,7 @@ func (r *JobRepository) FindAll(
|
|||||||
Where("job.job_id = ?", *jobId)
|
Where("job.job_id = ?", *jobId)
|
||||||
|
|
||||||
if cluster != nil {
|
if cluster != nil {
|
||||||
q = q.Where("job.cluster = ?", *cluster)
|
q = q.Where("job.hpc_cluster = ?", *cluster)
|
||||||
}
|
}
|
||||||
if startTime != nil {
|
if startTime != nil {
|
||||||
q = q.Where("job.start_time = ?", *startTime)
|
q = q.Where("job.start_time = ?", *startTime)
|
||||||
@@ -183,7 +183,7 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
|
|||||||
q := sq.Select(jobColumns...).
|
q := sq.Select(jobColumns...).
|
||||||
From("job").
|
From("job").
|
||||||
Where("job.job_id = ?", jobId).
|
Where("job.job_id = ?", jobId).
|
||||||
Where("job.cluster = ?", cluster).
|
Where("job.hpc_cluster = ?", cluster).
|
||||||
Where("job.start_time = ?", startTime)
|
Where("job.start_time = ?", startTime)
|
||||||
|
|
||||||
q, qerr := SecurityCheck(ctx, q)
|
q, qerr := SecurityCheck(ctx, q)
|
||||||
@@ -203,7 +203,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl
|
|||||||
From("job").
|
From("job").
|
||||||
Where("job.job_id = ?", jobId).
|
Where("job.job_id = ?", jobId).
|
||||||
Where("job.hpc_user = ?", user).
|
Where("job.hpc_user = ?", user).
|
||||||
Where("job.cluster = ?", cluster).
|
Where("job.hpc_cluster = ?", cluster).
|
||||||
Where("job.start_time = ?", startTime)
|
Where("job.start_time = ?", startTime)
|
||||||
|
|
||||||
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
|||||||
@@ -168,7 +168,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
query = buildMetaJsonCondition("jobName", filter.JobName, query)
|
query = buildMetaJsonCondition("jobName", filter.JobName, query)
|
||||||
}
|
}
|
||||||
if filter.Cluster != nil {
|
if filter.Cluster != nil {
|
||||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
query = buildStringCondition("job.hpc_cluster", filter.Cluster, query)
|
||||||
}
|
}
|
||||||
if filter.Partition != nil {
|
if filter.Partition != nil {
|
||||||
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
|
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
|
||||||
@@ -183,8 +183,8 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||||
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
||||||
}
|
}
|
||||||
if filter.Exclusive != nil {
|
if filter.Shared != nil {
|
||||||
query = query.Where("job.exclusive = ?", *filter.Exclusive)
|
query = query.Where("job.shared = ?", *filter.Shared)
|
||||||
}
|
}
|
||||||
if filter.State != nil {
|
if filter.State != nil {
|
||||||
states := make([]string, len(filter.State))
|
states := make([]string, len(filter.State))
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ CREATE TABLE "job_cache" (
|
|||||||
job_id BIGINT NOT NULL,
|
job_id BIGINT NOT NULL,
|
||||||
hpc_cluster VARCHAR(255) NOT NULL,
|
hpc_cluster VARCHAR(255) NOT NULL,
|
||||||
subcluster VARCHAR(255) NOT NULL,
|
subcluster VARCHAR(255) NOT NULL,
|
||||||
submit_time BIGINT NOT NULL, -- Unix timestamp
|
submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||||
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||||
hpc_user VARCHAR(255) NOT NULL,
|
hpc_user VARCHAR(255) NOT NULL,
|
||||||
project VARCHAR(255) NOT NULL,
|
project VARCHAR(255) NOT NULL,
|
||||||
@@ -30,7 +30,7 @@ CREATE TABLE "job_cache" (
|
|||||||
energy REAL NOT NULL DEFAULT 0.0,
|
energy REAL NOT NULL DEFAULT 0.0,
|
||||||
energy_footprint TEXT DEFAULT NULL,
|
energy_footprint TEXT DEFAULT NULL,
|
||||||
footprint TEXT DEFAULT NULL,
|
footprint TEXT DEFAULT NULL,
|
||||||
UNIQUE (job_id, cluster, start_time)
|
UNIQUE (job_id, hpc_cluster, start_time)
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE "job_new" (
|
CREATE TABLE "job_new" (
|
||||||
@@ -65,10 +65,33 @@ CREATE TABLE "job_new" (
|
|||||||
energy REAL NOT NULL DEFAULT 0.0,
|
energy REAL NOT NULL DEFAULT 0.0,
|
||||||
energy_footprint TEXT DEFAULT NULL,
|
energy_footprint TEXT DEFAULT NULL,
|
||||||
footprint TEXT DEFAULT NULL,
|
footprint TEXT DEFAULT NULL,
|
||||||
UNIQUE (job_id, cluster, start_time)
|
UNIQUE (job_id, hpc_cluster, start_time)
|
||||||
);
|
);
|
||||||
|
|
||||||
ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster;
|
ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster;
|
||||||
INSERT INTO job_new SELECT * FROM job;
|
|
||||||
|
CREATE TABLE IF NOT EXISTS lookup_exclusive (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL UNIQUE
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO lookup_exclusive (id, name) VALUES
|
||||||
|
(0, 'multi_user'),
|
||||||
|
(1, 'none'),
|
||||||
|
(2, 'single_user');
|
||||||
|
|
||||||
|
INSERT INTO job_new (
|
||||||
|
id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||||
|
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
|
||||||
|
num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy,
|
||||||
|
energy_footprint, footprint
|
||||||
|
) SELECT
|
||||||
|
id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project,
|
||||||
|
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
|
||||||
|
num_nodes, num_hwthreads, num_acc, smt, (SELECT name FROM lookup_exclusive WHERE id=job.exclusive), monitoring_status, energy,
|
||||||
|
energy_footprint, footprint
|
||||||
|
FROM job;
|
||||||
|
|
||||||
|
DROP TABLE lookup_exclusive;
|
||||||
DROP TABLE job;
|
DROP TABLE job;
|
||||||
ALTER TABLE job_new RENAME TO job;
|
ALTER TABLE job_new RENAME TO job;
|
||||||
|
|||||||
@@ -21,10 +21,9 @@ import (
|
|||||||
|
|
||||||
// GraphQL validation should make sure that no unkown values can be specified.
|
// GraphQL validation should make sure that no unkown values can be specified.
|
||||||
var groupBy2column = map[model.Aggregate]string{
|
var groupBy2column = map[model.Aggregate]string{
|
||||||
model.AggregateUser: "job.hpc_user",
|
model.AggregateUser: "job.hpc_user",
|
||||||
model.AggregateProject: "job.project",
|
model.AggregateProject: "job.project",
|
||||||
model.AggregateCluster: "job.cluster",
|
model.AggregateCluster: "job.hpc_cluster",
|
||||||
model.AggregateSubcluster: "job.subcluster",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var sortBy2column = map[model.SortByAggregate]string{
|
var sortBy2column = map[model.SortByAggregate]string{
|
||||||
|
|||||||
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
@@ -1 +1,2 @@
|
|||||||
vasp
|
vasp
|
||||||
|
VASP
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
],
|
],
|
||||||
"metrics": ["cpu_load"],
|
"metrics": ["cpu_load"],
|
||||||
"requirements": [
|
"requirements": [
|
||||||
"job.exclusive == 1",
|
"job.shared == \"none\"",
|
||||||
"job.duration > job_min_duration_seconds"
|
"job.duration > job_min_duration_seconds"
|
||||||
],
|
],
|
||||||
"variables": [
|
"variables": [
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
"parameters": ["job_min_duration_seconds"],
|
"parameters": ["job_min_duration_seconds"],
|
||||||
"metrics": ["flops_any", "mem_bw"],
|
"metrics": ["flops_any", "mem_bw"],
|
||||||
"requirements": [
|
"requirements": [
|
||||||
"job.exclusive == 1",
|
"job.shared == \"none\"",
|
||||||
"job.duration > job_min_duration_seconds"
|
"job.duration > job_min_duration_seconds"
|
||||||
],
|
],
|
||||||
"variables": [
|
"variables": [
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
],
|
],
|
||||||
"metrics": ["cpu_load"],
|
"metrics": ["cpu_load"],
|
||||||
"requirements": [
|
"requirements": [
|
||||||
"job.exclusive == 1",
|
"job.shared == \"none\"",
|
||||||
"job.duration > job_min_duration_seconds"
|
"job.duration > job_min_duration_seconds"
|
||||||
],
|
],
|
||||||
"variables": [
|
"variables": [
|
||||||
|
|||||||
@@ -26,9 +26,9 @@ func RegisterCommitJobService() {
|
|||||||
gocron.NewTask(
|
gocron.NewTask(
|
||||||
func() {
|
func() {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
cclog.Printf("Jobcache sync started at %s", start.Format(time.RFC3339))
|
cclog.Printf("Jobcache sync started at %s\n", start.Format(time.RFC3339))
|
||||||
jobs, _ := jobRepo.SyncJobs()
|
jobs, _ := jobRepo.SyncJobs()
|
||||||
repository.CallJobStartHooks(jobs)
|
repository.CallJobStartHooks(jobs)
|
||||||
cclog.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start))
|
cclog.Printf("Jobcache sync and job callbacks are done and took %s\n", time.Since(start))
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ func Start(cronCfg, archiveConfig json.RawMessage) {
|
|||||||
dec := json.NewDecoder(bytes.NewReader(cronCfg))
|
dec := json.NewDecoder(bytes.NewReader(cronCfg))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
if err := dec.Decode(&Keys); err != nil {
|
if err := dec.Decode(&Keys); err != nil {
|
||||||
cclog.Errorf("error while decoding ldap config: %v", err)
|
cclog.Errorf("error while decoding cron config: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var cfg struct {
|
var cfg struct {
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ func RegisterUpdateDurationWorker() {
|
|||||||
gocron.NewTask(
|
gocron.NewTask(
|
||||||
func() {
|
func() {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
cclog.Printf("Update duration started at %s", start.Format(time.RFC3339))
|
cclog.Printf("Update duration started at %s\n", start.Format(time.RFC3339))
|
||||||
jobRepo.UpdateDuration()
|
jobRepo.UpdateDuration()
|
||||||
cclog.Printf("Update duration is done and took %s", time.Since(start))
|
cclog.Printf("Update duration is done and took %s\n", time.Since(start))
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func RegisterFootprintWorker() {
|
|||||||
c := 0
|
c := 0
|
||||||
ce := 0
|
ce := 0
|
||||||
cl := 0
|
cl := 0
|
||||||
cclog.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
|
cclog.Printf("Update Footprints started at %s\n", s.Format(time.RFC3339))
|
||||||
|
|
||||||
for _, cluster := range archive.Clusters {
|
for _, cluster := range archive.Clusters {
|
||||||
s_cluster := time.Now()
|
s_cluster := time.Now()
|
||||||
@@ -134,8 +134,8 @@ func RegisterFootprintWorker() {
|
|||||||
}
|
}
|
||||||
jobRepo.TransactionEnd(t)
|
jobRepo.TransactionEnd(t)
|
||||||
}
|
}
|
||||||
cclog.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
|
cclog.Debugf("Finish Cluster %s, took %s\n", cluster.Name, time.Since(s_cluster))
|
||||||
}
|
}
|
||||||
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s\n", c, cl, ce, time.Since(s))
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
)
|
)
|
||||||
@@ -31,6 +33,8 @@ func initClusterConfig() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memorystore.Clusters = append(memorystore.Clusters, cluster.Name)
|
||||||
|
|
||||||
if len(cluster.Name) == 0 ||
|
if len(cluster.Name) == 0 ||
|
||||||
len(cluster.MetricConfig) == 0 ||
|
len(cluster.MetricConfig) == 0 ||
|
||||||
len(cluster.SubClusters) == 0 {
|
len(cluster.SubClusters) == 0 {
|
||||||
@@ -122,6 +126,16 @@ func initClusterConfig() error {
|
|||||||
}
|
}
|
||||||
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
|
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
|
||||||
metricLookup[mc.Name] = ml
|
metricLookup[mc.Name] = ml
|
||||||
|
|
||||||
|
agg, err := config.AssignAggregationStratergy(mc.Aggregation)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
config.AddMetric(mc.Name, config.MetricConfig{
|
||||||
|
Frequency: int64(mc.Timestep),
|
||||||
|
Aggregation: agg,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
Clusters = append(Clusters, cluster)
|
Clusters = append(Clusters, cluster)
|
||||||
|
|||||||
@@ -1,194 +1,194 @@
|
|||||||
{
|
{
|
||||||
"exclusive": 1,
|
"shared": "none",
|
||||||
"jobId": 1403244,
|
"jobId": 1403244,
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"mem_bw": {
|
"mem_bw": {
|
||||||
"avg": 63.57,
|
"avg": 63.57,
|
||||||
"min": 0,
|
"min": 0,
|
||||||
"unit": {
|
"unit": {
|
||||||
"base": "B/s",
|
"base": "B/s",
|
||||||
"prefix": "G"
|
"prefix": "G"
|
||||||
},
|
},
|
||||||
"max": 74.5
|
"max": 74.5
|
||||||
},
|
|
||||||
"rapl_power": {
|
|
||||||
"avg": 228.07,
|
|
||||||
"min": 0,
|
|
||||||
"unit": {
|
|
||||||
"base": "W"
|
|
||||||
},
|
|
||||||
"max": 258.56
|
|
||||||
},
|
|
||||||
"ipc": {
|
|
||||||
"unit": {
|
|
||||||
"base": "IPC"
|
|
||||||
},
|
|
||||||
"max": 0.510204081632653,
|
|
||||||
"avg": 1.53846153846154,
|
|
||||||
"min": 0.0
|
|
||||||
},
|
|
||||||
"clock": {
|
|
||||||
"min": 1380.32,
|
|
||||||
"avg": 2599.39,
|
|
||||||
"unit": {
|
|
||||||
"base": "Hz",
|
|
||||||
"prefix": "M"
|
|
||||||
},
|
|
||||||
"max": 2634.46
|
|
||||||
},
|
|
||||||
"cpu_load": {
|
|
||||||
"avg": 18.4,
|
|
||||||
"min": 0,
|
|
||||||
"max": 23.58,
|
|
||||||
"unit": {
|
|
||||||
"base": "load"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"flops_any": {
|
|
||||||
"max": 404.62,
|
|
||||||
"unit": {
|
|
||||||
"base": "F/s",
|
|
||||||
"prefix": "G"
|
|
||||||
},
|
|
||||||
"avg": 225.59,
|
|
||||||
"min": 0
|
|
||||||
},
|
|
||||||
"flops_dp": {
|
|
||||||
"max": 0.24,
|
|
||||||
"unit": {
|
|
||||||
"base": "F/s",
|
|
||||||
"prefix": "G"
|
|
||||||
},
|
|
||||||
"min": 0,
|
|
||||||
"avg": 0
|
|
||||||
},
|
|
||||||
"mem_used": {
|
|
||||||
"min": 1.55,
|
|
||||||
"avg": 27.84,
|
|
||||||
"unit": {
|
|
||||||
"base": "B",
|
|
||||||
"prefix": "G"
|
|
||||||
},
|
|
||||||
"max": 37.5
|
|
||||||
},
|
|
||||||
"flops_sp": {
|
|
||||||
"min": 0,
|
|
||||||
"avg": 225.59,
|
|
||||||
"max": 404.62,
|
|
||||||
"unit": {
|
|
||||||
"base": "F/s",
|
|
||||||
"prefix": "G"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"resources": [
|
"rapl_power": {
|
||||||
{
|
"avg": 228.07,
|
||||||
"hostname": "e0102"
|
"min": 0,
|
||||||
},
|
"unit": {
|
||||||
{
|
"base": "W"
|
||||||
"hostname": "e0103"
|
},
|
||||||
},
|
"max": 258.56
|
||||||
{
|
},
|
||||||
"hostname": "e0105"
|
"ipc": {
|
||||||
},
|
"unit": {
|
||||||
{
|
"base": "IPC"
|
||||||
"hostname": "e0106"
|
},
|
||||||
},
|
"max": 0.510204081632653,
|
||||||
{
|
"avg": 1.53846153846154,
|
||||||
"hostname": "e0107"
|
"min": 0.0
|
||||||
},
|
},
|
||||||
{
|
"clock": {
|
||||||
"hostname": "e0108"
|
"min": 1380.32,
|
||||||
},
|
"avg": 2599.39,
|
||||||
{
|
"unit": {
|
||||||
"hostname": "e0114"
|
"base": "Hz",
|
||||||
},
|
"prefix": "M"
|
||||||
{
|
},
|
||||||
"hostname": "e0320"
|
"max": 2634.46
|
||||||
},
|
},
|
||||||
{
|
"cpu_load": {
|
||||||
"hostname": "e0321"
|
"avg": 18.4,
|
||||||
},
|
"min": 0,
|
||||||
{
|
"max": 23.58,
|
||||||
"hostname": "e0325"
|
"unit": {
|
||||||
},
|
"base": "load"
|
||||||
{
|
}
|
||||||
"hostname": "e0404"
|
},
|
||||||
},
|
"flops_any": {
|
||||||
{
|
"max": 404.62,
|
||||||
"hostname": "e0415"
|
"unit": {
|
||||||
},
|
"base": "F/s",
|
||||||
{
|
"prefix": "G"
|
||||||
"hostname": "e0433"
|
},
|
||||||
},
|
"avg": 225.59,
|
||||||
{
|
"min": 0
|
||||||
"hostname": "e0437"
|
},
|
||||||
},
|
"flops_dp": {
|
||||||
{
|
"max": 0.24,
|
||||||
"hostname": "e0439"
|
"unit": {
|
||||||
},
|
"base": "F/s",
|
||||||
{
|
"prefix": "G"
|
||||||
"hostname": "e0501"
|
},
|
||||||
},
|
"min": 0,
|
||||||
{
|
"avg": 0
|
||||||
"hostname": "e0503"
|
},
|
||||||
},
|
"mem_used": {
|
||||||
{
|
"min": 1.55,
|
||||||
"hostname": "e0505"
|
"avg": 27.84,
|
||||||
},
|
"unit": {
|
||||||
{
|
"base": "B",
|
||||||
"hostname": "e0506"
|
"prefix": "G"
|
||||||
},
|
},
|
||||||
{
|
"max": 37.5
|
||||||
"hostname": "e0512"
|
},
|
||||||
},
|
"flops_sp": {
|
||||||
{
|
"min": 0,
|
||||||
"hostname": "e0513"
|
"avg": 225.59,
|
||||||
},
|
"max": 404.62,
|
||||||
{
|
"unit": {
|
||||||
"hostname": "e0514"
|
"base": "F/s",
|
||||||
},
|
"prefix": "G"
|
||||||
{
|
}
|
||||||
"hostname": "e0653"
|
}
|
||||||
},
|
},
|
||||||
{
|
"resources": [
|
||||||
"hostname": "e0701"
|
{
|
||||||
},
|
"hostname": "e0102"
|
||||||
{
|
},
|
||||||
"hostname": "e0716"
|
{
|
||||||
},
|
"hostname": "e0103"
|
||||||
{
|
},
|
||||||
"hostname": "e0727"
|
{
|
||||||
},
|
"hostname": "e0105"
|
||||||
{
|
},
|
||||||
"hostname": "e0728"
|
{
|
||||||
},
|
"hostname": "e0106"
|
||||||
{
|
},
|
||||||
"hostname": "e0925"
|
{
|
||||||
},
|
"hostname": "e0107"
|
||||||
{
|
},
|
||||||
"hostname": "e0926"
|
{
|
||||||
},
|
"hostname": "e0108"
|
||||||
{
|
},
|
||||||
"hostname": "e0929"
|
{
|
||||||
},
|
"hostname": "e0114"
|
||||||
{
|
},
|
||||||
"hostname": "e0934"
|
{
|
||||||
},
|
"hostname": "e0320"
|
||||||
{
|
},
|
||||||
"hostname": "e0951"
|
{
|
||||||
}
|
"hostname": "e0321"
|
||||||
],
|
},
|
||||||
"walltime": 10,
|
{
|
||||||
"jobState": "completed",
|
"hostname": "e0325"
|
||||||
"cluster": "emmy",
|
},
|
||||||
"subCluster": "haswell",
|
{
|
||||||
"stopTime": 1609009562,
|
"hostname": "e0404"
|
||||||
"user": "emmyUser6",
|
},
|
||||||
"startTime": 1608923076,
|
{
|
||||||
"partition": "work",
|
"hostname": "e0415"
|
||||||
"tags": [],
|
},
|
||||||
"project": "no project",
|
{
|
||||||
"numNodes": 32,
|
"hostname": "e0433"
|
||||||
"duration": 86486
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0437"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0439"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0501"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0503"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0505"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0506"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0512"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0513"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0514"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0701"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0716"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0727"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0728"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0925"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0926"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0929"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0934"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0951"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"walltime": 10,
|
||||||
|
"jobState": "completed",
|
||||||
|
"cluster": "emmy",
|
||||||
|
"subCluster": "haswell",
|
||||||
|
"stopTime": 1609009562,
|
||||||
|
"user": "emmyUser6",
|
||||||
|
"startTime": 1608923076,
|
||||||
|
"partition": "work",
|
||||||
|
"tags": [],
|
||||||
|
"project": "no project",
|
||||||
|
"numNodes": 32,
|
||||||
|
"duration": 86486
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,194 +1,194 @@
|
|||||||
{
|
{
|
||||||
"stopTime": 1609387081,
|
"stopTime": 1609387081,
|
||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"hostname": "e0151"
|
"hostname": "e0151"
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0152"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0153"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0232"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0303"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0314"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0344"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0345"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0348"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0507"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0518"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0520"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0522"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0526"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0527"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0528"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0530"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0551"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0604"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0613"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0634"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0639"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0640"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0651"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0653"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0701"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0704"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0751"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0809"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0814"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0819"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hostname": "e0908"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"walltime": 10,
|
|
||||||
"cluster": "emmy",
|
|
||||||
"subCluster": "haswell",
|
|
||||||
"jobState": "completed",
|
|
||||||
"statistics": {
|
|
||||||
"clock": {
|
|
||||||
"max": 2634.9,
|
|
||||||
"unit": {
|
|
||||||
"base": "Hz",
|
|
||||||
"prefix": "M"
|
|
||||||
},
|
|
||||||
"min": 0,
|
|
||||||
"avg": 2597.8
|
|
||||||
},
|
|
||||||
"cpu_load": {
|
|
||||||
"max": 27.41,
|
|
||||||
"min": 0,
|
|
||||||
"avg": 18.39,
|
|
||||||
"unit": {
|
|
||||||
"base": "load"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"mem_bw": {
|
|
||||||
"min": 0,
|
|
||||||
"avg": 63.23,
|
|
||||||
"unit": {
|
|
||||||
"base": "B/s",
|
|
||||||
"prefix": "G"
|
|
||||||
},
|
|
||||||
"max": 75.06
|
|
||||||
},
|
|
||||||
"ipc": {
|
|
||||||
"min": 0.0,
|
|
||||||
"avg": 1.53846153846154,
|
|
||||||
"unit": {
|
|
||||||
"base": "IPC"
|
|
||||||
},
|
|
||||||
"max": 0.490196078431373
|
|
||||||
},
|
|
||||||
"rapl_power": {
|
|
||||||
"min": 0,
|
|
||||||
"avg": 227.32,
|
|
||||||
"unit": {
|
|
||||||
"base": "W"
|
|
||||||
},
|
|
||||||
"max": 256.22
|
|
||||||
},
|
|
||||||
"mem_used": {
|
|
||||||
"min": 1.5,
|
|
||||||
"avg": 27.77,
|
|
||||||
"unit": {
|
|
||||||
"base": "B",
|
|
||||||
"prefix": "G"
|
|
||||||
},
|
|
||||||
"max": 37.43
|
|
||||||
},
|
|
||||||
"flops_sp": {
|
|
||||||
"unit": {
|
|
||||||
"base": "F/s",
|
|
||||||
"prefix": "G"
|
|
||||||
},
|
|
||||||
"max": 413.21,
|
|
||||||
"min": 0,
|
|
||||||
"avg": 224.41
|
|
||||||
},
|
|
||||||
"flops_dp": {
|
|
||||||
"max": 5.72,
|
|
||||||
"unit": {
|
|
||||||
"base": "F/s",
|
|
||||||
"prefix": "G"
|
|
||||||
},
|
|
||||||
"min": 0,
|
|
||||||
"avg": 0
|
|
||||||
},
|
|
||||||
"flops_any": {
|
|
||||||
"min": 0,
|
|
||||||
"avg": 224.42,
|
|
||||||
"max": 413.21,
|
|
||||||
"unit": {
|
|
||||||
"base": "F/s",
|
|
||||||
"prefix": "G"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"exclusive": 1,
|
{
|
||||||
"jobId": 1404397,
|
"hostname": "e0152"
|
||||||
"tags": [],
|
},
|
||||||
"partition": "work",
|
{
|
||||||
"project": "no project",
|
"hostname": "e0153"
|
||||||
"user": "emmyUser6",
|
},
|
||||||
"startTime": 1609300556,
|
{
|
||||||
"duration": 86525,
|
"hostname": "e0232"
|
||||||
"numNodes": 32
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0303"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0314"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0344"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0345"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0348"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0507"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0518"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0520"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0522"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0526"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0527"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0528"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0530"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0551"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0604"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0613"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0634"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0639"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0640"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0651"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0701"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0704"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0751"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0809"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0814"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0819"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0908"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"walltime": 10,
|
||||||
|
"cluster": "emmy",
|
||||||
|
"subCluster": "haswell",
|
||||||
|
"jobState": "completed",
|
||||||
|
"statistics": {
|
||||||
|
"clock": {
|
||||||
|
"max": 2634.9,
|
||||||
|
"unit": {
|
||||||
|
"base": "Hz",
|
||||||
|
"prefix": "M"
|
||||||
|
},
|
||||||
|
"min": 0,
|
||||||
|
"avg": 2597.8
|
||||||
|
},
|
||||||
|
"cpu_load": {
|
||||||
|
"max": 27.41,
|
||||||
|
"min": 0,
|
||||||
|
"avg": 18.39,
|
||||||
|
"unit": {
|
||||||
|
"base": "load"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mem_bw": {
|
||||||
|
"min": 0,
|
||||||
|
"avg": 63.23,
|
||||||
|
"unit": {
|
||||||
|
"base": "B/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 75.06
|
||||||
|
},
|
||||||
|
"ipc": {
|
||||||
|
"min": 0.0,
|
||||||
|
"avg": 1.53846153846154,
|
||||||
|
"unit": {
|
||||||
|
"base": "IPC"
|
||||||
|
},
|
||||||
|
"max": 0.490196078431373
|
||||||
|
},
|
||||||
|
"rapl_power": {
|
||||||
|
"min": 0,
|
||||||
|
"avg": 227.32,
|
||||||
|
"unit": {
|
||||||
|
"base": "W"
|
||||||
|
},
|
||||||
|
"max": 256.22
|
||||||
|
},
|
||||||
|
"mem_used": {
|
||||||
|
"min": 1.5,
|
||||||
|
"avg": 27.77,
|
||||||
|
"unit": {
|
||||||
|
"base": "B",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 37.43
|
||||||
|
},
|
||||||
|
"flops_sp": {
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 413.21,
|
||||||
|
"min": 0,
|
||||||
|
"avg": 224.41
|
||||||
|
},
|
||||||
|
"flops_dp": {
|
||||||
|
"max": 5.72,
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"min": 0,
|
||||||
|
"avg": 0
|
||||||
|
},
|
||||||
|
"flops_any": {
|
||||||
|
"min": 0,
|
||||||
|
"avg": 224.42,
|
||||||
|
"max": 413.21,
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"shared": "none",
|
||||||
|
"jobId": 1404397,
|
||||||
|
"tags": [],
|
||||||
|
"partition": "work",
|
||||||
|
"project": "no project",
|
||||||
|
"user": "emmyUser6",
|
||||||
|
"startTime": 1609300556,
|
||||||
|
"duration": 86525,
|
||||||
|
"numNodes": 32
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,5 +13,7 @@ else
|
|||||||
cp ./configs/config-demo.json config.json
|
cp ./configs/config-demo.json config.json
|
||||||
|
|
||||||
./cc-backend -migrate-db
|
./cc-backend -migrate-db
|
||||||
./cc-backend -server -dev -init-db -add-user demo:admin:demo
|
./cc-backend -dev -init-db -add-user demo:admin,api:demo
|
||||||
|
./cc-backend -server -dev
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -56,7 +56,7 @@
|
|||||||
job(id: "${dbid}") {
|
job(id: "${dbid}") {
|
||||||
id, jobId, user, project, cluster, startTime,
|
id, jobId, user, project, cluster, startTime,
|
||||||
duration, numNodes, numHWThreads, numAcc, energy,
|
duration, numNodes, numHWThreads, numAcc, energy,
|
||||||
SMT, exclusive, partition, subCluster, arrayJobId,
|
SMT, shared, partition, subCluster, arrayJobId,
|
||||||
monitoringStatus, state, walltime,
|
monitoringStatus, state, walltime,
|
||||||
tags { id, type, scope, name },
|
tags { id, type, scope, name },
|
||||||
resources { hostname, hwthreads, accelerators },
|
resources { hostname, hwthreads, accelerators },
|
||||||
@@ -325,7 +325,7 @@
|
|||||||
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
|
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
|
||||||
nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope}
|
nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope}
|
||||||
presetScopes={item.data.map((x) => x.scope)}
|
presetScopes={item.data.map((x) => x.scope)}
|
||||||
isShared={$initq.data.job.exclusive != 1}
|
isShared={$initq.data.job.shared != "none"}
|
||||||
/>
|
/>
|
||||||
{:else if item.disabled == true}
|
{:else if item.disabled == true}
|
||||||
<Card color="info">
|
<Card color="info">
|
||||||
|
|||||||
@@ -69,7 +69,7 @@
|
|||||||
hostname
|
hostname
|
||||||
}
|
}
|
||||||
SMT
|
SMT
|
||||||
exclusive
|
shared
|
||||||
partition
|
partition
|
||||||
arrayJobId
|
arrayJobId
|
||||||
monitoringStatus
|
monitoringStatus
|
||||||
|
|||||||
@@ -172,7 +172,7 @@
|
|||||||
{job.numNodes}
|
{job.numNodes}
|
||||||
{/if}
|
{/if}
|
||||||
<Icon name="pc-horizontal" />
|
<Icon name="pc-horizontal" />
|
||||||
{#if job.exclusive != 1}
|
{#if job.shared != "none"}
|
||||||
(shared)
|
(shared)
|
||||||
{/if}
|
{/if}
|
||||||
{#if job.numAcc > 0}
|
{#if job.numAcc > 0}
|
||||||
|
|||||||
@@ -213,7 +213,7 @@
|
|||||||
metric={metric.data.name}
|
metric={metric.data.name}
|
||||||
cluster={cluster.find((c) => c.name == job.cluster)}
|
cluster={cluster.find((c) => c.name == job.cluster)}
|
||||||
subCluster={job.subCluster}
|
subCluster={job.subCluster}
|
||||||
isShared={job.exclusive != 1}
|
isShared={job.shared != "none"}
|
||||||
numhwthreads={job.numHWThreads}
|
numhwthreads={job.numHWThreads}
|
||||||
numaccs={job.numAcc}
|
numaccs={job.numAcc}
|
||||||
zoomState={zoomStates[metric.data.name] || null}
|
zoomState={zoomStates[metric.data.name] || null}
|
||||||
|
|||||||
@@ -92,7 +92,7 @@
|
|||||||
Missing Metric
|
Missing Metric
|
||||||
</Button>
|
</Button>
|
||||||
</InputGroup>
|
</InputGroup>
|
||||||
{:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].exclusive}
|
{:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].shared == "none"}
|
||||||
<InputGroup>
|
<InputGroup>
|
||||||
<InputGroupText>
|
<InputGroupText>
|
||||||
<Icon name="circle-fill"/>
|
<Icon name="circle-fill"/>
|
||||||
@@ -104,7 +104,7 @@
|
|||||||
Exclusive
|
Exclusive
|
||||||
</Button>
|
</Button>
|
||||||
</InputGroup>
|
</InputGroup>
|
||||||
{:else if nodeJobsData.jobs.count >= 1 && !nodeJobsData.jobs.items[0].exclusive}
|
{:else if nodeJobsData.jobs.count >= 1 && !(nodeJobsData.jobs.items[0].shared == "none")}
|
||||||
<InputGroup>
|
<InputGroup>
|
||||||
<InputGroupText>
|
<InputGroupText>
|
||||||
<Icon name="circle-half"/>
|
<Icon name="circle-half"/>
|
||||||
|
|||||||
@@ -45,7 +45,7 @@
|
|||||||
jobId
|
jobId
|
||||||
user
|
user
|
||||||
project
|
project
|
||||||
exclusive
|
shared
|
||||||
resources {
|
resources {
|
||||||
hostname
|
hostname
|
||||||
accelerators
|
accelerators
|
||||||
@@ -101,7 +101,7 @@
|
|||||||
function buildExtendedLegend() {
|
function buildExtendedLegend() {
|
||||||
let pendingExtendedLegendData = null
|
let pendingExtendedLegendData = null
|
||||||
// Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes]
|
// Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes]
|
||||||
if ($nodeJobsData.data.jobs.count >= 1) { // "&& !$nodeJobsData.data.jobs.items[0].exclusive)"
|
if ($nodeJobsData.data.jobs.count >= 1) {
|
||||||
const accSet = Array.from(new Set($nodeJobsData.data.jobs.items
|
const accSet = Array.from(new Set($nodeJobsData.data.jobs.items
|
||||||
.map((i) => i.resources
|
.map((i) => i.resources
|
||||||
.filter((r) => (r.hostname === nodeData.host) && r?.accelerators)
|
.filter((r) => (r.hostname === nodeData.host) && r?.accelerators)
|
||||||
|
|||||||
Reference in New Issue
Block a user