mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-25 04:49:05 +01:00
Merge branch 'master' into 40_45_82_update_roles
This commit is contained in:
commit
e0e51813ad
2
Makefile
2
Makefile
@ -30,6 +30,7 @@ SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(TARGET): $(VAR) $(SVELTE_TARGETS)
|
||||
$(info ===> BUILD cc-backend)
|
||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
||||
./cc-backend --migrate-db
|
||||
|
||||
clean:
|
||||
$(info ===> CLEAN)
|
||||
@ -48,5 +49,4 @@ $(SVELTE_TARGETS): $(SVELTE_SRC)
|
||||
|
||||
$(VAR):
|
||||
@mkdir $(VAR)
|
||||
@touch ./var/job.db
|
||||
cd web/frontend && yarn install
|
||||
|
@ -98,6 +98,15 @@ A config file in the JSON format has to be provided using `--config` to override
|
||||
By default, if there is a `config.json` file in the current directory of the `cc-backend` process, it will be loaded even without the `--config` flag.
|
||||
You find documentation of all supported configuration and command line options [here](./configs.README.md).
|
||||
|
||||
## Database initialization and migration
|
||||
|
||||
Every cc-backend version supports a specific database version.
|
||||
On startup the version of the sqlite database is validated and cc-backend will terminate if the version does not match.
|
||||
cc-backend supports to migrate the database schema up to the required version using the `--migrate-db` command line option.
|
||||
In case the database file does not yet exist it is created and initialized by the `--migrate-db` command line option.
|
||||
In case you want to use a newer database version with an olden version of cc-backend you can downgrade a database using the external [migrate](https://github.com/golang-migrate/migrate) tool.
|
||||
In this case you have to provide the path to the migration files in a recent source tree: `./internal/repository/migrations/`.
|
||||
|
||||
## Development
|
||||
In case the REST or GraphQL API is changed the according code generators have to be used.
|
||||
|
||||
|
@ -10,6 +10,7 @@ type Job {
|
||||
jobId: Int!
|
||||
user: String!
|
||||
project: String!
|
||||
jobName: String
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
startTime: Time!
|
||||
@ -197,14 +198,15 @@ type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
tags: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
jobName: StringInput
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
@ -235,6 +237,7 @@ input StringInput {
|
||||
contains: String
|
||||
startsWith: String
|
||||
endsWith: String
|
||||
in: [String!]
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
@ -255,6 +258,7 @@ type HistoPoint {
|
||||
|
||||
type JobsStatistics {
|
||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||
name: String # if User-Statistics: Given Name of Account (ID) Owner
|
||||
totalJobs: Int! # Number of jobs that matched
|
||||
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
|
||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||
|
@ -13,7 +13,6 @@ import (
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
@ -62,19 +61,22 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
var flagReinitDB, flagServer, flagSyncLDAP, flagGops, flagDev, flagVersion bool
|
||||
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob string
|
||||
var flagReinitDB, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "debug", "Sets the logging level: `[debug (default),info,warn,err,fatal,crit]`")
|
||||
flag.Parse()
|
||||
|
||||
if flagVersion {
|
||||
@ -85,6 +87,9 @@ func main() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Apply config flags for pkg/log
|
||||
log.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||
if flagGops {
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
@ -108,6 +113,11 @@ func main() {
|
||||
config.Keys.DB = os.Getenv(envvar)
|
||||
}
|
||||
|
||||
if flagMigrateDB {
|
||||
repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
db := repository.GetConnection()
|
||||
|
||||
@ -118,7 +128,7 @@ func main() {
|
||||
"ldap": config.Keys.LdapConfig,
|
||||
"jwt": config.Keys.JwtConfig,
|
||||
}); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("auth initialization failed: %v", err)
|
||||
}
|
||||
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||
@ -135,12 +145,12 @@ func main() {
|
||||
if err := authentication.AddUser(&auth.User{
|
||||
Username: parts[0], Projects: emptyPrj, Password: parts[2], Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err)
|
||||
}
|
||||
}
|
||||
if flagDelUser != "" {
|
||||
if err := authentication.DelUser(flagDelUser); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("deleting user failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -150,7 +160,7 @@ func main() {
|
||||
}
|
||||
|
||||
if err := authentication.LdapAuth.Sync(); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("LDAP sync failed: %v", err)
|
||||
}
|
||||
log.Info("LDAP sync successfull")
|
||||
}
|
||||
@ -158,41 +168,41 @@ func main() {
|
||||
if flagGenJWT != "" {
|
||||
user, err := authentication.GetUser(flagGenJWT)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("could not get user from JWT: %v", err)
|
||||
}
|
||||
|
||||
if !user.HasRole(auth.RoleApi) {
|
||||
log.Warn("that user does not have the API role")
|
||||
log.Warnf("user '%s' does not have the API role", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authentication.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
||||
}
|
||||
|
||||
fmt.Printf("JWT for '%s': %s\n", user.Username, jwt)
|
||||
fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
|
||||
}
|
||||
} else if flagNewUser != "" || flagDelUser != "" {
|
||||
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
|
||||
}
|
||||
|
||||
if err := archive.Init(config.Keys.Archive, config.Keys.DisableArchive); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("failed to initialize archive: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
||||
}
|
||||
|
||||
if flagReinitDB {
|
||||
if err := repository.InitDB(); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("failed to re-initialize repository DB: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if flagImportJob != "" {
|
||||
if err := repository.HandleImportFlag(flagImportJob); err != nil {
|
||||
log.Fatalf("import failed: %s", err.Error())
|
||||
log.Fatalf("job import failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
@ -210,12 +220,12 @@ func main() {
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("panic: %s", e)
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("panic caused by: %w", e)
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
}
|
||||
|
||||
return errors.New("internal server error (panic)")
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
@ -296,28 +306,9 @@ func main() {
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user or job.
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
if search := r.URL.Query().Get("searchId"); search != "" {
|
||||
job, username, err := api.JobRepository.FindJobOrUser(r.Context(), search)
|
||||
if err == repository.ErrNotFound {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(search), http.StatusTemporaryRedirect)
|
||||
return
|
||||
} else if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if username != "" {
|
||||
http.Redirect(rw, r, "/monitoring/user/"+username, http.StatusTemporaryRedirect)
|
||||
return
|
||||
} else {
|
||||
http.Redirect(rw, r, fmt.Sprintf("/monitoring/job/%d", job), http.StatusTemporaryRedirect)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
http.Error(rw, "'searchId' query parameter missing", http.StatusBadRequest)
|
||||
}
|
||||
routerConfig.HandleSearchBar(rw, r, api)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
@ -362,7 +353,7 @@ func main() {
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
@ -374,7 +365,7 @@ func main() {
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
@ -392,16 +383,16 @@ func main() {
|
||||
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actuall http server can be started.
|
||||
// the actual http server can be started.
|
||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while changing user: %s", err.Error())
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
if err := server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
@ -411,7 +402,7 @@ func main() {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-sigs
|
||||
runtimeEnv.SystemdNotifiy(false, "shutting down")
|
||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
@ -425,7 +416,7 @@ func main() {
|
||||
for range time.Tick(30 * time.Minute) {
|
||||
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||
if err != nil {
|
||||
log.Errorf("error while looking for jobs exceeding theire walltime: %s", err.Error())
|
||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||
}
|
||||
runtime.GC()
|
||||
}
|
||||
|
@ -40,15 +40,15 @@ Start by creating a base folder for all of the following steps.
|
||||
- `cd ../..`
|
||||
* Build Go Executable
|
||||
- `go build ./cmd/cc-backend/`
|
||||
* Prepare Datafolder and Database file
|
||||
- `mkdir var`
|
||||
- `touch var/job.db`
|
||||
* Activate & Config environment for cc-backend
|
||||
- `cp configs/env-template.txt .env`
|
||||
- Optional: Have a look via `vim ./.env`
|
||||
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
|
||||
* Back to toplevel `clustercockpit`
|
||||
- `cd ..`
|
||||
* Prepare Datafolder and Database file
|
||||
- `mkdir var`
|
||||
- `./cc-backend --migrate-db`
|
||||
|
||||
### Setup cc-metric-store
|
||||
* Clone Repository
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"addr": "0.0.0.0:8080",
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
|
42
docs/searchbar.md
Normal file
42
docs/searchbar.md
Normal file
@ -0,0 +1,42 @@
|
||||
## Docs for ClusterCockpit Searchbar
|
||||
|
||||
### Usage
|
||||
|
||||
* Searchtags are implemented as `type:<query>` search-string
|
||||
* Types `jobId, jobName, projectId, username, name` for roles `admin` and `support`
|
||||
* `jobName` is jobName as persisted in `job.meta_data` table-column
|
||||
* `username` is actual account identifier as persisted in `job.user` table-column
|
||||
* `name` is account owners name as persisted in `user.name` table-column
|
||||
* Types `jobId, jobName` for role `user`
|
||||
* Examples:
|
||||
* `jobName:myJob12`
|
||||
* `jobId:123456`
|
||||
* `username:abcd100`
|
||||
* `name:Paul`
|
||||
* If no searchTag used: Best guess search with the following hierarchy
|
||||
* `jobId -> username -> name -> projectId -> jobName`
|
||||
* Destinations:
|
||||
* JobId: Always Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters)
|
||||
* JobName: Always Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters)
|
||||
* ProjectId: Always Job-Table
|
||||
* Username
|
||||
* If *one* match found: Opens detailed user-view (`/monitoring/user/$USER`)
|
||||
* If *multiple* matches found: Opens user-table with matches listed (`/monitoring/users/`)
|
||||
* **Please Note**: Only users with jobs will be shown in table! I.e., "multiple matches" can still be only one entry in table.
|
||||
* Name
|
||||
* If *one* matching username found: Opens detailed user-view (`/monitoring/user/$USER`)
|
||||
* If *multiple* usernames found: Opens user-table with matches listed (`/monitoring/users/`)
|
||||
* **Please Note**: Only users with jobs will be shown in table! I.e., "multiple matches" can still be only one entry in table.
|
||||
* Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match)
|
||||
* Simple HTML Error if ...
|
||||
* Best guess search fails -> 'Not Found'
|
||||
* Query `type` is unknown
|
||||
* More than two colons in string -> 'malformed'
|
||||
* Spaces trimmed (both for searchTag and queryString)
|
||||
* ` job12` == `job12`
|
||||
* `projectID : abcd ` == `projectId:abcd`
|
||||
* `jobName`- and `name-`queries work with a part of the target-string
|
||||
* `jobName:myjob` for jobName "myjob_cluster1"
|
||||
* `name:Paul` for name "Paul Atreides"
|
||||
|
||||
* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB.
|
12
go.mod
12
go.mod
@ -1,6 +1,6 @@
|
||||
module github.com/ClusterCockpit/cc-backend
|
||||
|
||||
go 1.17
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.16
|
||||
@ -39,10 +39,13 @@ require (
|
||||
github.com/go-openapi/jsonreference v0.20.0 // indirect
|
||||
github.com/go-openapi/spec v0.20.7 // indirect
|
||||
github.com/go-openapi/swag v0.22.3 // indirect
|
||||
github.com/golang-migrate/migrate/v4 v4.15.2 // indirect
|
||||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru v0.5.4 // indirect
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
@ -51,7 +54,7 @@ require (
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
@ -59,17 +62,20 @@ require (
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/client_model v0.3.0 // indirect
|
||||
github.com/prometheus/procfs v0.8.0 // indirect
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/swaggo/files v0.0.0-20220728132757-551d4a08d97a // indirect
|
||||
github.com/urfave/cli/v2 v2.8.1 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
go.uber.org/atomic v1.7.0 // indirect
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
|
||||
golang.org/x/net v0.0.0-20220909164309-bea034e7d591 // indirect
|
||||
golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect
|
||||
golang.org/x/sys v0.0.0-20220913175220-63ea55921009 // indirect
|
||||
golang.org/x/text v0.3.7 // indirect
|
||||
golang.org/x/tools v0.1.12 // indirect
|
||||
google.golang.org/appengine v1.6.6 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.28.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
|
@ -135,7 +135,7 @@ type ApiTag struct {
|
||||
type TagJobApiRequest []*ApiTag
|
||||
|
||||
func handleError(err error, statusCode int, rw http.ResponseWriter) {
|
||||
log.Warnf("REST API: %s", err.Error())
|
||||
log.Warnf("REST ERROR : %s", err.Error())
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(statusCode)
|
||||
json.NewEncoder(rw).Encode(ErrorResponse{
|
||||
@ -170,7 +170,7 @@ func decode(r io.Reader, val interface{}) error {
|
||||
// @router /jobs/ [get]
|
||||
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -301,7 +301,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
// @router /jobs/tag_job/{id} [post]
|
||||
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -366,7 +366,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// @router /jobs/start_job/ [post]
|
||||
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -447,7 +447,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// @router /jobs/stop_job/{id} [post]
|
||||
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -500,7 +500,7 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @router /jobs/stop_job/ [post]
|
||||
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -546,7 +546,7 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
// @router /jobs/delete_job/{id} [delete]
|
||||
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -594,7 +594,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @router /jobs/delete_job/ [delete]
|
||||
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -650,7 +650,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
// @router /jobs/delete_job_before/{ts} [delete]
|
||||
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@ -725,7 +725,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
|
||||
// func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
// handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
// handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
// return
|
||||
// }
|
||||
|
||||
@ -794,7 +794,7 @@ func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
|
||||
me := auth.GetUser(r.Context())
|
||||
if !me.HasRole(auth.RoleAdmin) {
|
||||
if username != me.Username {
|
||||
http.Error(rw, "only admins are allowed to sign JWTs not for themselves", http.StatusForbidden)
|
||||
http.Error(rw, "Only admins are allowed to sign JWTs not for themselves", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -819,13 +819,13 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Set("Content-Type", "text/plain")
|
||||
me := auth.GetUser(r.Context())
|
||||
if !me.HasRole(auth.RoleAdmin) {
|
||||
http.Error(rw, "only admins are allowed to create new users", http.StatusForbidden)
|
||||
http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
username, password, role, name, email, project := r.FormValue("username"), r.FormValue("password"), r.FormValue("role"), r.FormValue("name"), r.FormValue("email"), r.FormValue("project")
|
||||
if len(password) == 0 && role != auth.RoleApi {
|
||||
http.Error(rw, "only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
|
||||
http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
@ -848,12 +848,12 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
rw.Write([]byte(fmt.Sprintf("User %#v successfully created!\n", username)))
|
||||
rw.Write([]byte(fmt.Sprintf("User %v successfully created!\n", username)))
|
||||
}
|
||||
|
||||
func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
|
||||
http.Error(rw, "only admins are allowed to delete a user", http.StatusForbidden)
|
||||
http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
@ -868,7 +868,7 @@ func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
|
||||
http.Error(rw, "only admins are allowed to fetch a list of users", http.StatusForbidden)
|
||||
http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
@ -899,7 +899,7 @@ func (api *RestApi) getRoles(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
|
||||
http.Error(rw, "only admins are allowed to update a user", http.StatusForbidden)
|
||||
http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
@ -943,7 +943,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
|
||||
rw.Header().Set("Content-Type", "text/plain")
|
||||
key, value := r.FormValue("key"), r.FormValue("value")
|
||||
|
||||
fmt.Printf("KEY: %#v\nVALUE: %#v\n", key, value)
|
||||
fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
|
||||
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, auth.GetUser(r.Context())); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
@ -955,7 +955,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
|
||||
|
||||
func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
if api.MachineStateDir == "" {
|
||||
http.Error(rw, "not enabled", http.StatusNotFound)
|
||||
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
@ -986,7 +986,7 @@ func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
if api.MachineStateDir == "" {
|
||||
http.Error(rw, "not enabled", http.StatusNotFound)
|
||||
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -185,30 +185,20 @@ func Init(db *sqlx.DB,
|
||||
configs map[string]interface{}) (*Authentication, error) {
|
||||
auth := &Authentication{}
|
||||
auth.db = db
|
||||
_, err := db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
username varchar(255) PRIMARY KEY NOT NULL,
|
||||
password varchar(255) DEFAULT NULL,
|
||||
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
|
||||
name varchar(255) DEFAULT NULL,
|
||||
roles varchar(255) NOT NULL DEFAULT "[]",
|
||||
email varchar(255) DEFAULT NULL,
|
||||
projects varchar(255) NOT NULL DEFAULT "[]");`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing authentication -> decoding session key failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
@ -216,12 +206,14 @@ func Init(db *sqlx.DB,
|
||||
|
||||
auth.LocalAuth = &LocalAuthenticator{}
|
||||
if err := auth.LocalAuth.Init(auth, nil); err != nil {
|
||||
log.Error("Error while initializing authentication -> localAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
|
||||
|
||||
auth.JwtAuth = &JWTAuthenticator{}
|
||||
if err := auth.JwtAuth.Init(auth, configs["jwt"]); err != nil {
|
||||
log.Error("Error while initializing authentication -> jwtAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.JwtAuth)
|
||||
@ -229,6 +221,7 @@ func Init(db *sqlx.DB,
|
||||
if config, ok := configs["ldap"]; ok {
|
||||
auth.LdapAuth = &LdapAuthenticator{}
|
||||
if err := auth.LdapAuth.Init(auth, config); err != nil {
|
||||
log.Error("Error while initializing authentication -> ldapAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
|
||||
@ -243,6 +236,7 @@ func (auth *Authentication) AuthViaSession(
|
||||
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
if err != nil {
|
||||
log.Error("Error while getting session store")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -272,7 +266,7 @@ func (auth *Authentication) Login(
|
||||
user := (*User)(nil)
|
||||
if username != "" {
|
||||
if user, _ = auth.GetUser(username); err != nil {
|
||||
// log.Warnf("login of unkown user %#v", username)
|
||||
// log.Warnf("login of unkown user %v", username)
|
||||
_ = err
|
||||
}
|
||||
}
|
||||
@ -284,7 +278,7 @@ func (auth *Authentication) Login(
|
||||
|
||||
user, err = authenticator.Login(user, rw, r)
|
||||
if err != nil {
|
||||
log.Warnf("login failed: %s", err.Error())
|
||||
log.Warnf("user '%s' login failed: %s", user.Username, err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
@ -303,7 +297,7 @@ func (auth *Authentication) Login(
|
||||
session.Values["projects"] = user.Projects
|
||||
session.Values["roles"] = user.Roles
|
||||
if err := auth.sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Errorf("session save failed: %s", err.Error())
|
||||
log.Warnf("session save failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
@ -45,11 +45,13 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKey = ed25519.PublicKey(bytes)
|
||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT private key")
|
||||
return err
|
||||
}
|
||||
ja.privateKey = ed25519.PrivateKey(bytes)
|
||||
@ -58,6 +60,7 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
|
||||
if pubKey = os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode cross login JWT HS512 key")
|
||||
return err
|
||||
}
|
||||
ja.loginTokenKey = bytes
|
||||
@ -68,6 +71,7 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
|
||||
if keyFound && pubKeyCrossLogin != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode cross login JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
|
||||
@ -123,13 +127,15 @@ func (ja *JWTAuthenticator) Login(
|
||||
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
|
||||
return ja.loginTokenKey, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
|
||||
return nil, fmt.Errorf("AUTH/JWT > unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing jwt token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := token.Claims.Valid(); err != nil {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -151,6 +157,7 @@ func (ja *JWTAuthenticator) Login(
|
||||
if user == nil {
|
||||
user, err = ja.auth.GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%v'", sub)
|
||||
return nil, err
|
||||
} else if user == nil {
|
||||
user = &User{
|
||||
@ -159,6 +166,7 @@ func (ja *JWTAuthenticator) Login(
|
||||
AuthSource: AuthViaToken,
|
||||
}
|
||||
if err := ja.auth.AddUser(user); err != nil {
|
||||
log.Errorf("Error while adding user '%v' to auth from token", user.Username)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@ -223,11 +231,13 @@ func (ja *JWTAuthenticator) Auth(
|
||||
return ja.publicKey, nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check token validity
|
||||
if err := token.Claims.Valid(); err != nil {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -276,7 +286,7 @@ func (ja *JWTAuthenticator) Auth(
|
||||
session.Values["roles"] = roles
|
||||
|
||||
if err := ja.auth.sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Errorf("session save failed: %s", err.Error())
|
||||
log.Warnf("session save failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return nil, err
|
||||
}
|
||||
|
@ -39,21 +39,23 @@ func (la *LdapAuthenticator) Init(
|
||||
if la.config != nil && la.config.SyncInterval != "" {
|
||||
interval, err := time.ParseDuration(la.config.SyncInterval)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v", la.config.SyncInterval)
|
||||
return err
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
log.Info("Sync interval is zero")
|
||||
return nil
|
||||
}
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(interval)
|
||||
for t := range ticker.C {
|
||||
log.Printf("LDAP sync started at %s", t.Format(time.RFC3339))
|
||||
log.Printf("sync started at %s", t.Format(time.RFC3339))
|
||||
if err := la.Sync(); err != nil {
|
||||
log.Errorf("LDAP sync failed: %s", err.Error())
|
||||
log.Errorf("sync failed: %s", err.Error())
|
||||
}
|
||||
log.Print("LDAP sync done")
|
||||
log.Print("sync done")
|
||||
}
|
||||
}()
|
||||
}
|
||||
@ -76,12 +78,14 @@ func (la *LdapAuthenticator) Login(
|
||||
|
||||
l, err := la.getLdapConnection(false)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting ldap connection")
|
||||
return nil, err
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
userDn := strings.Replace(la.config.UserBind, "{username}", user.Username, -1)
|
||||
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
|
||||
log.Error("Error while binding to ldap connection")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -104,12 +108,14 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
users := map[string]int{}
|
||||
rows, err := la.auth.db.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying LDAP users")
|
||||
return err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var username string
|
||||
if err := rows.Scan(&username); err != nil {
|
||||
log.Warnf("Error while scanning for user '%s'", username)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -118,6 +124,7 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
|
||||
l, err := la.getLdapConnection(true)
|
||||
if err != nil {
|
||||
log.Error("LDAP connection error")
|
||||
return err
|
||||
}
|
||||
defer l.Close()
|
||||
@ -126,6 +133,7 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
la.config.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
|
||||
la.config.UserFilter, []string{"dn", "uid", "gecos"}, nil))
|
||||
if err != nil {
|
||||
log.Warn("LDAP search error")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -147,15 +155,17 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
|
||||
for username, where := range users {
|
||||
if where == IN_DB && la.config.SyncDelOldUsers {
|
||||
log.Debugf("ldap-sync: remove %#v (does not show up in LDAP anymore)", username)
|
||||
log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
|
||||
if _, err := la.auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
|
||||
log.Errorf("User '%s' not in LDAP anymore: Delete from DB failed", username)
|
||||
return err
|
||||
}
|
||||
} else if where == IN_LDAP {
|
||||
name := newnames[username]
|
||||
log.Debugf("ldap-sync: add %#v (name: %#v, roles: [user], ldap: true)", username, name)
|
||||
log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
|
||||
if _, err := la.auth.db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`,
|
||||
username, 1, name, "[\""+RoleUser+"\"]"); err != nil {
|
||||
log.Errorf("User '%s' new in LDAP: Insert into DB failed", username)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -170,12 +180,14 @@ func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
|
||||
|
||||
conn, err := ldap.DialURL(la.config.Url)
|
||||
if err != nil {
|
||||
log.Warn("LDAP URL dial failed")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if admin {
|
||||
if err := conn.Bind(la.config.SearchDN, la.syncPassword); err != nil {
|
||||
conn.Close()
|
||||
log.Warn("LDAP connection bind failed")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ func (la *LocalAuthenticator) Login(
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
|
||||
return nil, fmt.Errorf("user '%s' provided the wrong password (%w)", user.Username, e)
|
||||
return nil, fmt.Errorf("AUTH/LOCAL > user '%s' provided the wrong password (%w)", user.Username, e)
|
||||
}
|
||||
|
||||
return user, nil
|
||||
|
@ -25,6 +25,7 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
|
||||
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
|
||||
Where("user.username = ?", username).RunWith(auth.db).
|
||||
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
|
||||
log.Warnf("Error while querying user '%v' from database", username)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -33,6 +34,7 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
|
||||
user.Email = email.String
|
||||
if rawRoles.Valid {
|
||||
if err := json.Unmarshal([]byte(rawRoles.String), &user.Roles); err != nil {
|
||||
log.Warn("Error while unmarshaling raw roles from DB")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@ -64,6 +66,7 @@ func (auth *Authentication) AddUser(user *User) error {
|
||||
if user.Password != "" {
|
||||
password, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost)
|
||||
if err != nil {
|
||||
log.Error("Error while encrypting new user password")
|
||||
return err
|
||||
}
|
||||
cols = append(cols, "password")
|
||||
@ -71,6 +74,7 @@ func (auth *Authentication) AddUser(user *User) error {
|
||||
}
|
||||
|
||||
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(auth.db).Exec(); err != nil {
|
||||
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -81,6 +85,7 @@ func (auth *Authentication) AddUser(user *User) error {
|
||||
func (auth *Authentication) DelUser(username string) error {
|
||||
|
||||
_, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
||||
log.Errorf("Error while deleting user '%s' from DB", username)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -93,6 +98,7 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
|
||||
|
||||
rows, err := q.RunWith(auth.db).Query()
|
||||
if err != nil {
|
||||
log.Warn("Error while querying user list")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -104,10 +110,12 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
|
||||
user := &User{}
|
||||
var name, email sql.NullString
|
||||
if err := rows.Scan(&user.Username, &name, &email, &rawroles, &rawprojects); err != nil {
|
||||
log.Warn("Error while scanning user list")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(rawroles), &user.Roles); err != nil {
|
||||
log.Warn("Error while unmarshaling raw role list")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -129,11 +137,12 @@ func (auth *Authentication) AddRole(
|
||||
|
||||
user, err := auth.GetUser(username)
|
||||
if err != nil {
|
||||
log.Warnf("Could not load user '%s'", username)
|
||||
return err
|
||||
}
|
||||
|
||||
if !IsValidRole(role) {
|
||||
return fmt.Errorf("invalid user role: %#v", role)
|
||||
return fmt.Errorf("Invalid user role: %v", role)
|
||||
}
|
||||
|
||||
if user.HasRole(role) {
|
||||
@ -142,6 +151,7 @@ func (auth *Authentication) AddRole(
|
||||
|
||||
roles, _ := json.Marshal(append(user.Roles, role))
|
||||
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
|
||||
log.Errorf("Error while adding new role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@ -150,11 +160,12 @@ func (auth *Authentication) AddRole(
|
||||
func (auth *Authentication) RemoveRole(ctx context.Context, username string, role string) error {
|
||||
user, err := auth.GetUser(username)
|
||||
if err != nil {
|
||||
log.Warnf("Could not load user '%s'", username)
|
||||
return err
|
||||
}
|
||||
|
||||
if !IsValidRole(role) {
|
||||
return fmt.Errorf("invalid user role: %#v", role)
|
||||
return fmt.Errorf("Invalid user role: %#v", role)
|
||||
}
|
||||
|
||||
if role == RoleManager && len(user.Projects) != 0 {
|
||||
@ -174,11 +185,12 @@ func (auth *Authentication) RemoveRole(ctx context.Context, username string, rol
|
||||
if exists == true {
|
||||
var mroles, _ = json.Marshal(newroles)
|
||||
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
|
||||
log.Errorf("Error while removing role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
} else {
|
||||
return fmt.Errorf("user %#v already does not have role %#v", username, role)
|
||||
return fmt.Errorf("User '%v' already does not have role: %v", username, role)
|
||||
}
|
||||
}
|
||||
|
||||
@ -259,9 +271,13 @@ func FetchUser(ctx context.Context, db *sqlx.DB, username string) (*model.User,
|
||||
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
|
||||
RunWith(db).QueryRow().Scan(&name, &email); err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
|
||||
/* since FetchUser will be called to retrieve full name and mail for every job in query/list */
|
||||
// log.Warnf("User '%s' Not found in DB", username)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
log.Warnf("Error while fetching user '%s'", username)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ func Init(flagConfigFile string) {
|
||||
raw, err := os.ReadFile(flagConfigFile)
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("CONFIG ERROR: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
|
||||
@ -58,7 +58,7 @@ func Init(flagConfigFile string) {
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("could not decode: %v", err)
|
||||
}
|
||||
|
||||
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {
|
||||
|
@ -19,7 +19,7 @@ func TestInit(t *testing.T) {
|
||||
func TestInitMinimal(t *testing.T) {
|
||||
fp := "../../docs/config.json"
|
||||
Init(fp)
|
||||
if Keys.Addr != "0.0.0.0:8080" {
|
||||
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:8080", Keys.Addr)
|
||||
if Keys.Addr != "127.0.0.1:8080" {
|
||||
t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
|
||||
}
|
||||
}
|
||||
|
@ -88,6 +88,7 @@ type ComplexityRoot struct {
|
||||
Exclusive func(childComplexity int) int
|
||||
ID func(childComplexity int) int
|
||||
JobID func(childComplexity int) int
|
||||
JobName func(childComplexity int) int
|
||||
MetaData func(childComplexity int) int
|
||||
MonitoringStatus func(childComplexity int) int
|
||||
NumAcc func(childComplexity int) int
|
||||
@ -130,6 +131,7 @@ type ComplexityRoot struct {
|
||||
HistDuration func(childComplexity int) int
|
||||
HistNumNodes func(childComplexity int) int
|
||||
ID func(childComplexity int) int
|
||||
Name func(childComplexity int) int
|
||||
ShortJobs func(childComplexity int) int
|
||||
TotalCoreHours func(childComplexity int) int
|
||||
TotalJobs func(childComplexity int) int
|
||||
@ -263,6 +265,8 @@ type ClusterResolver interface {
|
||||
Partitions(ctx context.Context, obj *schema.Cluster) ([]string, error)
|
||||
}
|
||||
type JobResolver interface {
|
||||
JobName(ctx context.Context, obj *schema.Job) (*string, error)
|
||||
|
||||
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
|
||||
|
||||
MetaData(ctx context.Context, obj *schema.Job) (interface{}, error)
|
||||
@ -452,6 +456,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
|
||||
|
||||
return e.complexity.Job.JobID(childComplexity), true
|
||||
|
||||
case "Job.jobName":
|
||||
if e.complexity.Job.JobName == nil {
|
||||
break
|
||||
}
|
||||
|
||||
return e.complexity.Job.JobName(childComplexity), true
|
||||
|
||||
case "Job.metaData":
|
||||
if e.complexity.Job.MetaData == nil {
|
||||
break
|
||||
@ -662,6 +673,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
|
||||
|
||||
return e.complexity.JobsStatistics.ID(childComplexity), true
|
||||
|
||||
case "JobsStatistics.name":
|
||||
if e.complexity.JobsStatistics.Name == nil {
|
||||
break
|
||||
}
|
||||
|
||||
return e.complexity.JobsStatistics.Name(childComplexity), true
|
||||
|
||||
case "JobsStatistics.shortJobs":
|
||||
if e.complexity.JobsStatistics.ShortJobs == nil {
|
||||
break
|
||||
@ -1391,6 +1409,7 @@ type Job {
|
||||
jobId: Int!
|
||||
user: String!
|
||||
project: String!
|
||||
jobName: String
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
startTime: Time!
|
||||
@ -1578,14 +1597,15 @@ type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
tags: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
jobName: StringInput
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
@ -1616,6 +1636,7 @@ input StringInput {
|
||||
contains: String
|
||||
startsWith: String
|
||||
endsWith: String
|
||||
in: [String!]
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
@ -1636,6 +1657,7 @@ type HistoPoint {
|
||||
|
||||
type JobsStatistics {
|
||||
id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster
|
||||
name: String # if User-Statistics: Given Name of Account (ID) Owner
|
||||
totalJobs: Int! # Number of jobs that matched
|
||||
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
|
||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||
@ -3038,6 +3060,47 @@ func (ec *executionContext) fieldContext_Job_project(ctx context.Context, field
|
||||
return fc, nil
|
||||
}
|
||||
|
||||
func (ec *executionContext) _Job_jobName(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
|
||||
fc, err := ec.fieldContext_Job_jobName(ctx, field)
|
||||
if err != nil {
|
||||
return graphql.Null
|
||||
}
|
||||
ctx = graphql.WithFieldContext(ctx, fc)
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
ec.Error(ctx, ec.Recover(ctx, r))
|
||||
ret = graphql.Null
|
||||
}
|
||||
}()
|
||||
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
|
||||
ctx = rctx // use context from middleware stack in children
|
||||
return ec.resolvers.Job().JobName(rctx, obj)
|
||||
})
|
||||
if err != nil {
|
||||
ec.Error(ctx, err)
|
||||
return graphql.Null
|
||||
}
|
||||
if resTmp == nil {
|
||||
return graphql.Null
|
||||
}
|
||||
res := resTmp.(*string)
|
||||
fc.Result = res
|
||||
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
|
||||
}
|
||||
|
||||
func (ec *executionContext) fieldContext_Job_jobName(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
|
||||
fc = &graphql.FieldContext{
|
||||
Object: "Job",
|
||||
Field: field,
|
||||
IsMethod: true,
|
||||
IsResolver: true,
|
||||
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
|
||||
return nil, errors.New("field of type String does not have child fields")
|
||||
},
|
||||
}
|
||||
return fc, nil
|
||||
}
|
||||
|
||||
func (ec *executionContext) _Job_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
|
||||
fc, err := ec.fieldContext_Job_cluster(ctx, field)
|
||||
if err != nil {
|
||||
@ -4231,6 +4294,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(ctx context.Context
|
||||
return ec.fieldContext_Job_user(ctx, field)
|
||||
case "project":
|
||||
return ec.fieldContext_Job_project(ctx, field)
|
||||
case "jobName":
|
||||
return ec.fieldContext_Job_jobName(ctx, field)
|
||||
case "cluster":
|
||||
return ec.fieldContext_Job_cluster(ctx, field)
|
||||
case "subCluster":
|
||||
@ -4441,6 +4506,47 @@ func (ec *executionContext) fieldContext_JobsStatistics_id(ctx context.Context,
|
||||
return fc, nil
|
||||
}
|
||||
|
||||
func (ec *executionContext) _JobsStatistics_name(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
|
||||
fc, err := ec.fieldContext_JobsStatistics_name(ctx, field)
|
||||
if err != nil {
|
||||
return graphql.Null
|
||||
}
|
||||
ctx = graphql.WithFieldContext(ctx, fc)
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
ec.Error(ctx, ec.Recover(ctx, r))
|
||||
ret = graphql.Null
|
||||
}
|
||||
}()
|
||||
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
|
||||
ctx = rctx // use context from middleware stack in children
|
||||
return obj.Name, nil
|
||||
})
|
||||
if err != nil {
|
||||
ec.Error(ctx, err)
|
||||
return graphql.Null
|
||||
}
|
||||
if resTmp == nil {
|
||||
return graphql.Null
|
||||
}
|
||||
res := resTmp.(*string)
|
||||
fc.Result = res
|
||||
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
|
||||
}
|
||||
|
||||
func (ec *executionContext) fieldContext_JobsStatistics_name(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
|
||||
fc = &graphql.FieldContext{
|
||||
Object: "JobsStatistics",
|
||||
Field: field,
|
||||
IsMethod: false,
|
||||
IsResolver: false,
|
||||
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
|
||||
return nil, errors.New("field of type String does not have child fields")
|
||||
},
|
||||
}
|
||||
return fc, nil
|
||||
}
|
||||
|
||||
func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
|
||||
fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
|
||||
if err != nil {
|
||||
@ -6078,6 +6184,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
|
||||
return ec.fieldContext_Job_user(ctx, field)
|
||||
case "project":
|
||||
return ec.fieldContext_Job_project(ctx, field)
|
||||
case "jobName":
|
||||
return ec.fieldContext_Job_jobName(ctx, field)
|
||||
case "cluster":
|
||||
return ec.fieldContext_Job_cluster(ctx, field)
|
||||
case "subCluster":
|
||||
@ -6357,6 +6465,8 @@ func (ec *executionContext) fieldContext_Query_jobsStatistics(ctx context.Contex
|
||||
switch field.Name {
|
||||
case "id":
|
||||
return ec.fieldContext_JobsStatistics_id(ctx, field)
|
||||
case "name":
|
||||
return ec.fieldContext_JobsStatistics_name(ctx, field)
|
||||
case "totalJobs":
|
||||
return ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
|
||||
case "shortJobs":
|
||||
@ -10389,7 +10499,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
|
||||
asMap[k] = v
|
||||
}
|
||||
|
||||
fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"}
|
||||
fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"}
|
||||
for _, k := range fieldsInOrder {
|
||||
v, ok := asMap[k]
|
||||
if !ok {
|
||||
@ -10436,6 +10546,14 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
|
||||
if err != nil {
|
||||
return it, err
|
||||
}
|
||||
case "jobName":
|
||||
var err error
|
||||
|
||||
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("jobName"))
|
||||
it.JobName, err = ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
|
||||
if err != nil {
|
||||
return it, err
|
||||
}
|
||||
case "cluster":
|
||||
var err error
|
||||
|
||||
@ -10629,7 +10747,7 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
|
||||
asMap[k] = v
|
||||
}
|
||||
|
||||
fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith"}
|
||||
fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith", "in"}
|
||||
for _, k := range fieldsInOrder {
|
||||
v, ok := asMap[k]
|
||||
if !ok {
|
||||
@ -10668,6 +10786,14 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
|
||||
if err != nil {
|
||||
return it, err
|
||||
}
|
||||
case "in":
|
||||
var err error
|
||||
|
||||
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("in"))
|
||||
it.In, err = ec.unmarshalOString2ᚕstringᚄ(ctx, v)
|
||||
if err != nil {
|
||||
return it, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -11000,6 +11126,23 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
|
||||
if out.Values[i] == graphql.Null {
|
||||
atomic.AddUint32(&invalids, 1)
|
||||
}
|
||||
case "jobName":
|
||||
field := field
|
||||
|
||||
innerFunc := func(ctx context.Context) (res graphql.Marshaler) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
ec.Error(ctx, ec.Recover(ctx, r))
|
||||
}
|
||||
}()
|
||||
res = ec._Job_jobName(ctx, field, obj)
|
||||
return res
|
||||
}
|
||||
|
||||
out.Concurrently(i, func() graphql.Marshaler {
|
||||
return innerFunc(ctx)
|
||||
|
||||
})
|
||||
case "cluster":
|
||||
|
||||
out.Values[i] = ec._Job_cluster(ctx, field, obj)
|
||||
@ -11312,6 +11455,10 @@ func (ec *executionContext) _JobsStatistics(ctx context.Context, sel ast.Selecti
|
||||
if out.Values[i] == graphql.Null {
|
||||
invalids++
|
||||
}
|
||||
case "name":
|
||||
|
||||
out.Values[i] = ec._JobsStatistics_name(ctx, field, obj)
|
||||
|
||||
case "totalJobs":
|
||||
|
||||
out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj)
|
||||
|
@ -42,6 +42,7 @@ type JobFilter struct {
|
||||
ArrayJobID *int `json:"arrayJobId"`
|
||||
User *StringInput `json:"user"`
|
||||
Project *StringInput `json:"project"`
|
||||
JobName *StringInput `json:"jobName"`
|
||||
Cluster *StringInput `json:"cluster"`
|
||||
Partition *StringInput `json:"partition"`
|
||||
Duration *schema.IntRange `json:"duration"`
|
||||
@ -71,6 +72,7 @@ type JobResultList struct {
|
||||
|
||||
type JobsStatistics struct {
|
||||
ID string `json:"id"`
|
||||
Name *string `json:"name"`
|
||||
TotalJobs int `json:"totalJobs"`
|
||||
ShortJobs int `json:"shortJobs"`
|
||||
TotalWalltime int `json:"totalWalltime"`
|
||||
@ -101,10 +103,11 @@ type PageRequest struct {
|
||||
}
|
||||
|
||||
type StringInput struct {
|
||||
Eq *string `json:"eq"`
|
||||
Contains *string `json:"contains"`
|
||||
StartsWith *string `json:"startsWith"`
|
||||
EndsWith *string `json:"endsWith"`
|
||||
Eq *string `json:"eq"`
|
||||
Contains *string `json:"contains"`
|
||||
StartsWith *string `json:"startsWith"`
|
||||
EndsWith *string `json:"endsWith"`
|
||||
In []string `json:"in"`
|
||||
}
|
||||
|
||||
type TimeRangeOutput struct {
|
||||
|
@ -16,6 +16,7 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@ -24,6 +25,11 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
||||
return r.Repo.Partitions(obj.Name)
|
||||
}
|
||||
|
||||
// JobName is the resolver for the jobName field.
|
||||
func (r *jobResolver) JobName(ctx context.Context, obj *schema.Job) (*string, error) {
|
||||
return r.Repo.FetchJobName(obj)
|
||||
}
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(&obj.ID)
|
||||
@ -43,6 +49,7 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name)
|
||||
if err != nil {
|
||||
log.Warn("Error while creating tag")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -58,6 +65,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while adding tag to job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -65,10 +73,12 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
for _, tagId := range tagIds {
|
||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing tag id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
|
||||
log.Warn("Error while adding tag")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@ -80,6 +90,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing job id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -87,10 +98,12 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
for _, tagId := range tagIds {
|
||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing tag id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
|
||||
log.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@ -101,6 +114,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
// UpdateConfiguration is the resolver for the updateConfiguration field.
|
||||
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, auth.GetUser(ctx)); err != nil {
|
||||
log.Warn("Error while updating user config")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -126,6 +140,7 @@ func (r *queryResolver) User(ctx context.Context, username string) (*model.User,
|
||||
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
|
||||
data, err := r.Repo.AllocatedNodes(cluster)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching allocated nodes")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -144,11 +159,13 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
|
||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||
numericId, err := strconv.ParseInt(id, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing job id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job, err := r.Repo.FindById(numericId)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -163,11 +180,13 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying job for metrics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -175,7 +194,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
||||
for name, md := range data {
|
||||
for scope, metric := range md {
|
||||
if metric.Scope != schema.MetricScope(scope) {
|
||||
panic("WTF?")
|
||||
log.Panic("metric.Scope != schema.MetricScope(scope) : Should not happen!")
|
||||
}
|
||||
|
||||
res = append(res, &model.JobMetricWithName{
|
||||
@ -204,11 +223,13 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
||||
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
count, err := r.Repo.CountJobs(ctx, filter)
|
||||
if err != nil {
|
||||
log.Warn("Error while counting jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -217,13 +238,14 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
||||
|
||||
// JobsStatistics is the resolver for the jobsStatistics field.
|
||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
return r.jobsStatistics(ctx, filter, groupBy)
|
||||
return r.Repo.JobsStatistics(ctx, filter, groupBy)
|
||||
}
|
||||
|
||||
// JobsCount is the resolver for the jobsCount field.
|
||||
func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) {
|
||||
counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit)
|
||||
if err != nil {
|
||||
log.Warn("Error while counting grouped jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -257,6 +279,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
|
||||
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading node data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -6,217 +6,16 @@ package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// GraphQL validation should make sure that no unkown values can be specified.
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.user",
|
||||
model.AggregateProject: "job.project",
|
||||
model.AggregateCluster: "job.cluster",
|
||||
}
|
||||
|
||||
const ShortJobDuration int = 5 * 60
|
||||
|
||||
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
|
||||
stats := map[string]*model.JobsStatistics{}
|
||||
|
||||
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
||||
for _, cluster := range archive.Clusters {
|
||||
for _, subcluster := range cluster.SubClusters {
|
||||
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
|
||||
var rawQuery sq.SelectBuilder
|
||||
if groupBy == nil {
|
||||
rawQuery = sq.Select(
|
||||
"''",
|
||||
"COUNT(job.id)",
|
||||
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
|
||||
corehoursCol,
|
||||
).From("job")
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
rawQuery = sq.Select(
|
||||
col,
|
||||
"COUNT(job.id)",
|
||||
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
|
||||
corehoursCol,
|
||||
).From("job").GroupBy(col)
|
||||
}
|
||||
|
||||
rawQuery = rawQuery.
|
||||
Where("job.cluster = ?", cluster.Name).
|
||||
Where("job.subcluster = ?", subcluster.Name)
|
||||
|
||||
query, qerr := repository.SecurityCheck(ctx, rawQuery)
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filter {
|
||||
query = repository.BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var jobs, walltime, corehours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
if s, ok := stats[id.String]; ok {
|
||||
s.TotalJobs += int(jobs.Int64)
|
||||
s.TotalWalltime += int(walltime.Int64)
|
||||
s.TotalCoreHours += int(corehours.Int64)
|
||||
} else {
|
||||
stats[id.String] = &model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(jobs.Int64),
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalCoreHours: int(corehours.Int64),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
|
||||
query, qerr := repository.SecurityCheck(ctx, sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration))
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filter {
|
||||
query = repository.BuildWhereClause(f, query)
|
||||
}
|
||||
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
|
||||
query, qerr := repository.SecurityCheck(ctx, sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration))
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filter {
|
||||
query = repository.BuildWhereClause(f, query)
|
||||
}
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var shortJobs sql.NullInt64
|
||||
if err := rows.Scan(&id, &shortJobs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
stats[id.String].ShortJobs = int(shortJobs.Int64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculating the histogram data is expensive, so only do it if needed.
|
||||
// An explicit resolver can not be used because we need to know the filters.
|
||||
histogramsNeeded := false
|
||||
fields := graphql.CollectFieldsCtx(ctx, nil)
|
||||
for _, col := range fields {
|
||||
if col.Name == "histDuration" || col.Name == "histNumNodes" {
|
||||
histogramsNeeded = true
|
||||
}
|
||||
}
|
||||
|
||||
res := make([]*model.JobsStatistics, 0, len(stats))
|
||||
for _, stat := range stats {
|
||||
res = append(res, stat)
|
||||
id, col := "", ""
|
||||
if groupBy != nil {
|
||||
id = stat.ID
|
||||
col = groupBy2column[*groupBy]
|
||||
}
|
||||
|
||||
if histogramsNeeded {
|
||||
var err error
|
||||
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as int) as value`, time.Now().Unix())
|
||||
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
|
||||
// to add a condition to the query of the kind "<col> = <id>".
|
||||
func (r *queryResolver) jobsStatisticsHistogram(ctx context.Context, value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
|
||||
|
||||
query, qerr := repository.SecurityCheck(ctx, sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = repository.BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
if len(id) != 0 && len(col) != 0 {
|
||||
query = query.Where(col+" = ?", id)
|
||||
}
|
||||
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
points := make([]*model.HistoPoint, 0)
|
||||
for rows.Next() {
|
||||
point := model.HistoPoint{}
|
||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
points = append(points, &point)
|
||||
}
|
||||
return points, nil
|
||||
}
|
||||
|
||||
const MAX_JOBS_FOR_ANALYSIS = 500
|
||||
|
||||
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
@ -228,10 +27,11 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
log.Error("Error while querying jobs for roofline")
|
||||
return nil, err
|
||||
}
|
||||
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
||||
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
|
||||
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
|
||||
}
|
||||
|
||||
fcols, frows := float64(cols), float64(rows)
|
||||
@ -248,19 +48,20 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
|
||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
if err != nil {
|
||||
log.Error("Error while loading metrics for roofline")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
||||
if flops_ == nil && membw_ == nil {
|
||||
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||
return nil, fmt.Errorf("GRAPH/STATS > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||
}
|
||||
|
||||
flops, ok1 := flops_["node"]
|
||||
membw, ok2 := membw_["node"]
|
||||
if !ok1 || !ok2 {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||
return nil, errors.New("GRAPH/STATS > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||
}
|
||||
|
||||
for n := 0; n < len(flops.Series); n++ {
|
||||
@ -292,10 +93,11 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
log.Error("Error while querying jobs for footprint")
|
||||
return nil, err
|
||||
}
|
||||
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
||||
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
|
||||
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
|
||||
}
|
||||
|
||||
avgs := make([][]schema.Float, len(metrics))
|
||||
@ -310,6 +112,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Error("Error while loading averages for footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@ -78,6 +79,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
|
||||
|
||||
var config CCMetricStoreConfig
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -124,11 +126,13 @@ func (ccms *CCMetricStore) doRequest(
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
if err := json.NewEncoder(buf).Encode(body); err != nil {
|
||||
log.Warn("Error while encoding request body")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
|
||||
if err != nil {
|
||||
log.Warn("Error while building request body")
|
||||
return nil, err
|
||||
}
|
||||
if ccms.jwt != "" {
|
||||
@ -137,6 +141,7 @@ func (ccms *CCMetricStore) doRequest(
|
||||
|
||||
res, err := ccms.client.Do(req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -146,6 +151,7 @@ func (ccms *CCMetricStore) doRequest(
|
||||
|
||||
var resBody ApiQueryResponse
|
||||
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
|
||||
log.Warn("Error while decoding result body")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -161,6 +167,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
|
||||
if err != nil {
|
||||
log.Warn("Error while building queries")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -175,6 +182,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -202,6 +210,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
|
||||
for _, res := range row {
|
||||
if res.Error != nil {
|
||||
/* Build list for "partial errors", if any */
|
||||
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||
continue
|
||||
}
|
||||
@ -245,7 +254,8 @@ func (ccms *CCMetricStore) LoadData(
|
||||
}
|
||||
|
||||
if len(errors) != 0 {
|
||||
return jobData, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
|
||||
/* Returns list for "partial errors" */
|
||||
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
@ -272,8 +282,8 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
remoteName := ccms.toRemoteName(metric)
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -483,7 +493,7 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, nil, fmt.Errorf("TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
|
||||
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -498,6 +508,7 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
|
||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode})
|
||||
if err != nil {
|
||||
log.Warn("Error while building query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -512,6 +523,7 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -521,7 +533,7 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
data := res[0]
|
||||
if data.Error != nil {
|
||||
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||
}
|
||||
|
||||
metricdata, ok := stats[metric]
|
||||
@ -531,7 +543,7 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
}
|
||||
|
||||
if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {
|
||||
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
|
||||
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
|
||||
}
|
||||
|
||||
metricdata[query.Hostname] = schema.MetricStatistics{
|
||||
@ -577,6 +589,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -593,11 +606,12 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
qdata := res[0]
|
||||
if qdata.Error != nil {
|
||||
/* Build list for "partial errors", if any */
|
||||
errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error))
|
||||
}
|
||||
|
||||
if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() {
|
||||
// return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
|
||||
qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0.
|
||||
}
|
||||
|
||||
@ -627,7 +641,8 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
}
|
||||
|
||||
if len(errors) != 0 {
|
||||
return data, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
|
||||
/* Returns list of "partial errors" */
|
||||
return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||
}
|
||||
|
||||
return data, nil
|
||||
|
@ -10,11 +10,11 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||
@ -37,6 +37,7 @@ type InfluxDBv2DataRepository struct {
|
||||
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var config InfluxDBv2DataRepositoryConfig
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -71,7 +72,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO
|
||||
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
}
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
||||
}
|
||||
@ -84,7 +85,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
switch scope {
|
||||
case "node":
|
||||
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
|
||||
// log.Println("Note: Scope 'node' requested. ")
|
||||
// log.Info("Scope 'node' requested. ")
|
||||
query = fmt.Sprintf(`
|
||||
from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
@ -97,10 +98,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
||||
measurementsCond, hostsCond)
|
||||
case "socket":
|
||||
log.Println("Note: Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
case "core":
|
||||
log.Println("Note: Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
// Get Finest Granularity only, Set NULL to 0.0
|
||||
// query = fmt.Sprintf(`
|
||||
@ -114,13 +115,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
|
||||
// measurementsCond, hostsCond)
|
||||
default:
|
||||
log.Println("Note: Unknown Scope requested: Will return 'node' scope. ")
|
||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
||||
continue
|
||||
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node'")
|
||||
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
|
||||
}
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -192,6 +194,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
||||
// }
|
||||
default:
|
||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
||||
continue
|
||||
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
|
||||
}
|
||||
@ -202,21 +205,22 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
// Get Stats
|
||||
stats, err := idb.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading statistics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope == "node" { // No 'socket/core' support yet
|
||||
for metric, nodes := range stats {
|
||||
// log.Println(fmt.Sprintf("<< Add Stats for : Field %s >>", metric))
|
||||
// log.Debugf("<< Add Stats for : Field %s >>", metric)
|
||||
for node, stats := range nodes {
|
||||
// log.Println(fmt.Sprintf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg ))
|
||||
// log.Debugf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg )
|
||||
for index, _ := range jobData[metric][scope].Series {
|
||||
// log.Println(fmt.Sprintf("<< Try to add Stats to Series in Position %d >>", index))
|
||||
// log.Debugf("<< Try to add Stats to Series in Position %d >>", index)
|
||||
if jobData[metric][scope].Series[index].Hostname == node {
|
||||
// log.Println(fmt.Sprintf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname))
|
||||
// log.Debugf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname)
|
||||
jobData[metric][scope].Series[index].Statistics = &schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
|
||||
// log.Println(fmt.Sprintf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg))
|
||||
// log.Debugf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -228,9 +232,9 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
// for _, scope := range scopes {
|
||||
// for _, met := range metrics {
|
||||
// for _, series := range jobData[met][scope].Series {
|
||||
// log.Println(fmt.Sprintf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
|
||||
// log.Debugf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
|
||||
// len(series.Data), met, series.Hostname, scope,
|
||||
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg))
|
||||
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
@ -249,7 +253,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO
|
||||
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
}
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
||||
}
|
||||
@ -258,7 +262,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
|
||||
// lenMet := len(metrics)
|
||||
|
||||
for _, metric := range metrics {
|
||||
// log.Println(fmt.Sprintf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet))
|
||||
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
data = from(bucket: "%s")
|
||||
@ -275,6 +279,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -285,17 +290,17 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
|
||||
|
||||
avg, avgok := row.ValueByKey("avg").(float64)
|
||||
if !avgok {
|
||||
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg))
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
|
||||
avg = 0.0
|
||||
}
|
||||
min, minok := row.ValueByKey("min").(float64)
|
||||
if !minok {
|
||||
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min))
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
|
||||
min = 0.0
|
||||
}
|
||||
max, maxok := row.ValueByKey("max").(float64)
|
||||
if !maxok {
|
||||
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max))
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
|
||||
max = 0.0
|
||||
}
|
||||
|
||||
@ -319,7 +324,7 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
|
||||
// TODO : Implement to be used in Analysis- und System/Node-View
|
||||
log.Println(fmt.Sprintf("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes))
|
||||
log.Infof("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
|
||||
|
||||
return nil, errors.New("unimplemented for InfluxDBv2DataRepository")
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||
}
|
||||
|
@ -46,6 +46,7 @@ func Init(disableArchive bool) error {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json MetricDataRepository")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -60,10 +61,11 @@ func Init(disableArchive bool) error {
|
||||
case "test":
|
||||
mdr = &TestMetricDataRepository{}
|
||||
default:
|
||||
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", kind.Kind, cluster.Name)
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
|
||||
}
|
||||
|
||||
if err := mdr.Init(cluster.MetricDataRepository); err != nil {
|
||||
log.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
|
||||
return err
|
||||
}
|
||||
metricDataRepos[cluster.Name] = mdr
|
||||
@ -90,7 +92,7 @@ func LoadData(job *schema.Job,
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
|
||||
if !ok {
|
||||
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
@ -107,8 +109,9 @@ func LoadData(job *schema.Job,
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Errorf("partial error: %s", err.Error())
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
@ -116,6 +119,7 @@ func LoadData(job *schema.Job,
|
||||
} else {
|
||||
jd, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
@ -163,6 +167,7 @@ func LoadData(job *schema.Job,
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -182,11 +187,12 @@ func LoadAverages(
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -217,7 +223,7 @@ func LoadNodeData(
|
||||
|
||||
repo, ok := metricDataRepos[cluster]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster)
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
@ -229,14 +235,15 @@ func LoadNodeData(
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Errorf("partial error: %s", err.Error())
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", cluster)
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
@ -303,6 +310,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
|
||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -5,46 +5,46 @@
|
||||
package metricdata
|
||||
|
||||
import (
|
||||
"os"
|
||||
"errors"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
"bytes"
|
||||
"net/http"
|
||||
"time"
|
||||
"math"
|
||||
"sort"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
promapi "github.com/prometheus/client_golang/api"
|
||||
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
promcfg "github.com/prometheus/common/config"
|
||||
promm "github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
type PrometheusDataRepositoryConfig struct {
|
||||
Url string `json:"url"`
|
||||
Username string `json:"username,omitempty"`
|
||||
Suffix string `json:"suffix,omitempty"`
|
||||
Templates map[string]string `json:"query-templates"`
|
||||
Url string `json:"url"`
|
||||
Username string `json:"username,omitempty"`
|
||||
Suffix string `json:"suffix,omitempty"`
|
||||
Templates map[string]string `json:"query-templates"`
|
||||
}
|
||||
|
||||
type PrometheusDataRepository struct {
|
||||
client promapi.Client
|
||||
queryClient promv1.API
|
||||
suffix string
|
||||
templates map[string]*template.Template
|
||||
client promapi.Client
|
||||
queryClient promv1.API
|
||||
suffix string
|
||||
templates map[string]*template.Template
|
||||
}
|
||||
|
||||
type PromQLArgs struct {
|
||||
Nodes string
|
||||
Nodes string
|
||||
}
|
||||
|
||||
type Trie map[rune]Trie
|
||||
@ -60,10 +60,9 @@ func contains(s []schema.MetricScope, str schema.MetricScope) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
func MinMaxMean(data []schema.Float) (float64, float64, float64) {
|
||||
if len(data) == 0 {
|
||||
return 0.0, 0.0, 0.0
|
||||
return 0.0, 0.0, 0.0
|
||||
}
|
||||
min := math.MaxFloat64
|
||||
max := -math.MaxFloat64
|
||||
@ -75,85 +74,92 @@ func MinMaxMean(data []schema.Float) (float64, float64, float64) {
|
||||
}
|
||||
sum += float64(val)
|
||||
n += 1
|
||||
if float64(val) > max {max = float64(val)}
|
||||
if float64(val) < min {min = float64(val)}
|
||||
if float64(val) > max {
|
||||
max = float64(val)
|
||||
}
|
||||
if float64(val) < min {
|
||||
min = float64(val)
|
||||
}
|
||||
}
|
||||
return min, max, sum / n
|
||||
}
|
||||
|
||||
|
||||
// Rewritten from
|
||||
// https://github.com/ermanh/trieregex/blob/master/trieregex/trieregex.py
|
||||
func nodeRegex(nodes []string) string {
|
||||
root := Trie{}
|
||||
// add runes of each compute node to trie
|
||||
for _, node := range nodes {
|
||||
_trie := root
|
||||
for _, c := range node {
|
||||
if _, ok := _trie[c]; !ok {_trie[c] = Trie{}}
|
||||
_trie = _trie[c]
|
||||
}
|
||||
_trie['*'] = Trie{}
|
||||
}
|
||||
// recursively build regex from rune trie
|
||||
var trieRegex func(trie Trie, reset bool) string
|
||||
trieRegex = func(trie Trie, reset bool) string {
|
||||
if reset == true {
|
||||
trie = root
|
||||
}
|
||||
if len(trie) == 0 {
|
||||
return ""
|
||||
}
|
||||
if len(trie) == 1 {
|
||||
for key, _trie := range trie {
|
||||
if key == '*' { return "" }
|
||||
return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)
|
||||
}
|
||||
} else {
|
||||
sequences := []string{}
|
||||
for key, _trie := range trie {
|
||||
if key != '*' {
|
||||
sequences = append(sequences, regexp.QuoteMeta(string(key)) + trieRegex(_trie, false))
|
||||
}
|
||||
}
|
||||
sort.Slice(sequences, func(i, j int) bool {
|
||||
return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j])
|
||||
})
|
||||
var result string
|
||||
// single edge from this tree node
|
||||
if len(sequences) == 1 {
|
||||
result = sequences[0]
|
||||
if len(result) > 1 {
|
||||
result = "(?:" + result + ")"
|
||||
}
|
||||
// multiple edges, each length 1
|
||||
} else if s := strings.Join(sequences, ""); len(s) == len(sequences) {
|
||||
// char or numeric range
|
||||
if len(s)-1 == int(s[len(s)-1]) - int(s[0]) {
|
||||
result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1])
|
||||
// char or numeric set
|
||||
} else {
|
||||
result = "[" + s + "]"
|
||||
}
|
||||
// multiple edges of different lengths
|
||||
} else {
|
||||
result = "(?:" + strings.Join(sequences, "|") + ")"
|
||||
}
|
||||
if _, ok := trie['*']; ok { result += "?"}
|
||||
return result
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return trieRegex(root, true)
|
||||
root := Trie{}
|
||||
// add runes of each compute node to trie
|
||||
for _, node := range nodes {
|
||||
_trie := root
|
||||
for _, c := range node {
|
||||
if _, ok := _trie[c]; !ok {
|
||||
_trie[c] = Trie{}
|
||||
}
|
||||
_trie = _trie[c]
|
||||
}
|
||||
_trie['*'] = Trie{}
|
||||
}
|
||||
// recursively build regex from rune trie
|
||||
var trieRegex func(trie Trie, reset bool) string
|
||||
trieRegex = func(trie Trie, reset bool) string {
|
||||
if reset == true {
|
||||
trie = root
|
||||
}
|
||||
if len(trie) == 0 {
|
||||
return ""
|
||||
}
|
||||
if len(trie) == 1 {
|
||||
for key, _trie := range trie {
|
||||
if key == '*' {
|
||||
return ""
|
||||
}
|
||||
return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)
|
||||
}
|
||||
} else {
|
||||
sequences := []string{}
|
||||
for key, _trie := range trie {
|
||||
if key != '*' {
|
||||
sequences = append(sequences, regexp.QuoteMeta(string(key))+trieRegex(_trie, false))
|
||||
}
|
||||
}
|
||||
sort.Slice(sequences, func(i, j int) bool {
|
||||
return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j])
|
||||
})
|
||||
var result string
|
||||
// single edge from this tree node
|
||||
if len(sequences) == 1 {
|
||||
result = sequences[0]
|
||||
if len(result) > 1 {
|
||||
result = "(?:" + result + ")"
|
||||
}
|
||||
// multiple edges, each length 1
|
||||
} else if s := strings.Join(sequences, ""); len(s) == len(sequences) {
|
||||
// char or numeric range
|
||||
if len(s)-1 == int(s[len(s)-1])-int(s[0]) {
|
||||
result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1])
|
||||
// char or numeric set
|
||||
} else {
|
||||
result = "[" + s + "]"
|
||||
}
|
||||
// multiple edges of different lengths
|
||||
} else {
|
||||
result = "(?:" + strings.Join(sequences, "|") + ")"
|
||||
}
|
||||
if _, ok := trie['*']; ok {
|
||||
result += "?"
|
||||
}
|
||||
return result
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return trieRegex(root, true)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var config PrometheusDataRepositoryConfig
|
||||
// parse config
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
return err
|
||||
}
|
||||
// support basic authentication
|
||||
@ -163,15 +169,16 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
|
||||
} else {
|
||||
if config.Username != "" {
|
||||
return errors.New("Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
|
||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
|
||||
}
|
||||
}
|
||||
// init client
|
||||
client, err := promapi.NewClient(promapi.Config{
|
||||
Address: config.Url,
|
||||
Address: config.Url,
|
||||
RoundTripper: rt,
|
||||
})
|
||||
if err != nil {
|
||||
log.Error("Error while initializing new prometheus client")
|
||||
return err
|
||||
}
|
||||
// init query client
|
||||
@ -186,15 +193,12 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
if err == nil {
|
||||
log.Debugf("Added PromQL template for %s: %s", metric, templ)
|
||||
} else {
|
||||
log.Errorf("Failed to parse PromQL template %s for metric %s", templ, metric)
|
||||
log.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO: respect scope argument
|
||||
func (pdb *PrometheusDataRepository) FormatQuery(
|
||||
metric string,
|
||||
@ -213,53 +217,47 @@ func (pdb *PrometheusDataRepository) FormatQuery(
|
||||
if templ, ok := pdb.templates[metric]; ok {
|
||||
err := templ.Execute(buf, args)
|
||||
if err != nil {
|
||||
return "", errors.New(fmt.Sprintf("Error compiling template %v", templ))
|
||||
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ))
|
||||
} else {
|
||||
query := buf.String()
|
||||
log.Debugf(fmt.Sprintf("PromQL: %s", query))
|
||||
log.Debugf("PromQL: %s", query)
|
||||
return query, nil
|
||||
}
|
||||
} else {
|
||||
return "", errors.New(fmt.Sprintf("No PromQL for metric %s configured.", metric))
|
||||
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > No PromQL for metric %s configured.", metric))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Convert PromAPI row to CC schema.Series
|
||||
func (pdb *PrometheusDataRepository) RowToSeries(
|
||||
from time.Time,
|
||||
step int64,
|
||||
steps int64,
|
||||
row *promm.SampleStream) (schema.Series) {
|
||||
ts := from.Unix()
|
||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||
// init array of expected length with NaN
|
||||
values := make([]schema.Float, steps + 1)
|
||||
for i, _ := range values {
|
||||
values[i] = schema.NaN
|
||||
}
|
||||
// copy recorded values from prom sample pair
|
||||
for _, v := range row.Values {
|
||||
idx := (v.Timestamp.Unix() - ts) / step
|
||||
values[idx] = schema.Float(v.Value)
|
||||
}
|
||||
min, max, mean := MinMaxMean(values)
|
||||
// output struct
|
||||
return schema.Series{
|
||||
Hostname: hostname,
|
||||
Data: values,
|
||||
Statistics: &schema.MetricStatistics{
|
||||
Avg: mean,
|
||||
Min: min,
|
||||
Max: max,
|
||||
},
|
||||
}
|
||||
from time.Time,
|
||||
step int64,
|
||||
steps int64,
|
||||
row *promm.SampleStream) schema.Series {
|
||||
ts := from.Unix()
|
||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||
// init array of expected length with NaN
|
||||
values := make([]schema.Float, steps+1)
|
||||
for i, _ := range values {
|
||||
values[i] = schema.NaN
|
||||
}
|
||||
|
||||
|
||||
|
||||
// copy recorded values from prom sample pair
|
||||
for _, v := range row.Values {
|
||||
idx := (v.Timestamp.Unix() - ts) / step
|
||||
values[idx] = schema.Float(v.Value)
|
||||
}
|
||||
min, max, mean := MinMaxMean(values)
|
||||
// output struct
|
||||
return schema.Series{
|
||||
Hostname: hostname,
|
||||
Data: values,
|
||||
Statistics: &schema.MetricStatistics{
|
||||
Avg: mean,
|
||||
Min: min,
|
||||
Max: max,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (pdb *PrometheusDataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
@ -268,7 +266,7 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
|
||||
// TODO respect requested scope
|
||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode){
|
||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
@ -283,36 +281,38 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))})
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
metricConfig := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if metricConfig == nil {
|
||||
log.Errorf(fmt.Sprintf("Error in LoadData: Metric %s for cluster %s not configured",
|
||||
metric, job.Cluster))
|
||||
return nil, errors.New("Prometheus querry error")
|
||||
log.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
|
||||
return nil, errors.New("Prometheus config error")
|
||||
}
|
||||
query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster)
|
||||
if err != nil {
|
||||
log.Warn("Error while formatting prometheus query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ranged query over all job nodes
|
||||
r := promv1.Range{
|
||||
Start: from,
|
||||
End: to,
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
Start: from,
|
||||
End: to,
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(fmt.Sprintf("Prometheus query error in LoadData: %v\nQuery: %s", err, query))
|
||||
return nil, errors.New("Prometheus querry error")
|
||||
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings))
|
||||
log.Warnf("Warnings: %v\n", warnings)
|
||||
}
|
||||
|
||||
// init data structures
|
||||
@ -338,16 +338,13 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
}
|
||||
// sort by hostname to get uniform coloring
|
||||
sort.Slice(jobMetric.Series, func(i, j int) bool {
|
||||
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
|
||||
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
|
||||
})
|
||||
}
|
||||
}
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO change implementation to precomputed/cached stats
|
||||
func (pdb *PrometheusDataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
@ -359,6 +356,7 @@ func (pdb *PrometheusDataRepository) LoadStats(
|
||||
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for stats")
|
||||
return nil, err
|
||||
}
|
||||
for metric, metricData := range data {
|
||||
@ -371,9 +369,6 @@ func (pdb *PrometheusDataRepository) LoadStats(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
@ -390,35 +385,37 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
}
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))})
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
for _, metric := range metrics {
|
||||
metricConfig := archive.GetMetricConfig(cluster, metric)
|
||||
if metricConfig == nil {
|
||||
log.Errorf(fmt.Sprintf("Error in LoadNodeData: Metric %s for cluster %s not configured",
|
||||
metric, cluster))
|
||||
return nil, errors.New("Prometheus querry error")
|
||||
log.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
|
||||
return nil, errors.New("Prometheus config error")
|
||||
}
|
||||
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
|
||||
if err != nil {
|
||||
log.Warn("Error while formatting prometheus query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ranged query over all nodes
|
||||
r := promv1.Range{
|
||||
Start: from,
|
||||
End: to,
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
Start: from,
|
||||
End: to,
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(fmt.Sprintf("Prometheus query error in LoadNodeData: %v\n", err))
|
||||
return nil, errors.New("Prometheus querry error")
|
||||
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings))
|
||||
log.Warnf("Warnings: %v\n", warnings)
|
||||
}
|
||||
|
||||
step := int64(metricConfig.Timestep)
|
||||
@ -437,13 +434,13 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
Unit: metricConfig.Unit,
|
||||
Scope: scope,
|
||||
Timestep: metricConfig.Timestep,
|
||||
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
||||
},
|
||||
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
t1 := time.Since(t0)
|
||||
log.Debugf(fmt.Sprintf("LoadNodeData of %v nodes took %s", len(data), t1))
|
||||
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
||||
return data, nil
|
||||
}
|
||||
|
@ -5,12 +5,15 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/mattn/go-sqlite3"
|
||||
"github.com/qustavo/sqlhooks/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -19,7 +22,8 @@ var (
|
||||
)
|
||||
|
||||
type DBConnection struct {
|
||||
DB *sqlx.DB
|
||||
DB *sqlx.DB
|
||||
Driver string
|
||||
}
|
||||
|
||||
func Connect(driver string, db string) {
|
||||
@ -28,7 +32,9 @@ func Connect(driver string, db string) {
|
||||
|
||||
dbConnOnce.Do(func() {
|
||||
if driver == "sqlite3" {
|
||||
dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db))
|
||||
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
|
||||
dbHandle, err = sqlx.Open("sqlite3WithHooks", fmt.Sprintf("%s?_foreign_keys=on", db))
|
||||
// dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@ -39,7 +45,7 @@ func Connect(driver string, db string) {
|
||||
} else if driver == "mysql" {
|
||||
dbHandle, err = sqlx.Open("mysql", fmt.Sprintf("%s?multiStatements=true", db))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("sqlx.Open() error: %v", err)
|
||||
}
|
||||
|
||||
dbHandle.SetConnMaxLifetime(time.Minute * 3)
|
||||
@ -49,7 +55,8 @@ func Connect(driver string, db string) {
|
||||
log.Fatalf("unsupported database driver: %s", driver)
|
||||
}
|
||||
|
||||
dbConnInstance = &DBConnection{DB: dbHandle}
|
||||
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
|
||||
checkDBVersion(driver, dbHandle.DB)
|
||||
})
|
||||
}
|
||||
|
||||
|
28
internal/repository/hooks.go
Normal file
28
internal/repository/hooks.go
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
// Hooks satisfies the sqlhook.Hooks interface
|
||||
type Hooks struct{}
|
||||
|
||||
// Before hook will print the query with it's args and return the context with the timestamp
|
||||
func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
|
||||
log.Infof("SQL query %s %q", query, args)
|
||||
return context.WithValue(ctx, "begin", time.Now()), nil
|
||||
}
|
||||
|
||||
// After hook will get the timestamp registered on the Before hook and print the elapsed time
|
||||
func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
|
||||
begin := ctx.Value("begin").(time.Time)
|
||||
log.Infof("Took: %s\n", time.Since(begin))
|
||||
return ctx, nil
|
||||
}
|
@ -19,67 +19,6 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
// `AUTO_INCREMENT` is in a comment because of this hack:
|
||||
// https://stackoverflow.com/a/41028314 (sqlite creates unique ids automatically)
|
||||
const JobsDBSchema string = `
|
||||
DROP TABLE IF EXISTS jobtag;
|
||||
DROP TABLE IF EXISTS job;
|
||||
DROP TABLE IF EXISTS tag;
|
||||
|
||||
CREATE TABLE job (
|
||||
id INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster VARCHAR(255) NOT NULL,
|
||||
subcluster VARCHAR(255) NOT NULL,
|
||||
start_time BIGINT NOT NULL, -- Unix timestamp
|
||||
|
||||
user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
|
||||
array_job_id BIGINT NOT NULL,
|
||||
duration INT NOT NULL DEFAULT 0,
|
||||
walltime INT NOT NULL DEFAULT 0,
|
||||
job_state VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
|
||||
meta_data TEXT, -- JSON
|
||||
resources TEXT NOT NULL, -- JSON
|
||||
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT NOT NULL,
|
||||
num_acc INT NOT NULL,
|
||||
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
|
||||
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
|
||||
|
||||
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
load_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||
file_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
|
||||
|
||||
CREATE TABLE tag (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tag_type VARCHAR(255) NOT NULL,
|
||||
tag_name VARCHAR(255) NOT NULL,
|
||||
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
|
||||
|
||||
CREATE TABLE jobtag (
|
||||
job_id INTEGER,
|
||||
tag_id INTEGER,
|
||||
PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
|
||||
`
|
||||
|
||||
// Indexes are created after the job-archive is traversed for faster inserts.
|
||||
const JobsDbIndexes string = `
|
||||
CREATE INDEX job_by_user ON job (user);
|
||||
CREATE INDEX job_by_starttime ON job (start_time);
|
||||
CREATE INDEX job_by_job_id ON job (job_id);
|
||||
CREATE INDEX job_by_state ON job (job_state);
|
||||
`
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
|
||||
@ -95,40 +34,44 @@ func HandleImportFlag(flag string) error {
|
||||
for _, pair := range strings.Split(flag, ",") {
|
||||
files := strings.Split(pair, ":")
|
||||
if len(files) != 2 {
|
||||
return fmt.Errorf("invalid import flag format")
|
||||
return fmt.Errorf("REPOSITORY/INIT > invalid import flag format")
|
||||
}
|
||||
|
||||
raw, err := os.ReadFile(files[0])
|
||||
if err != nil {
|
||||
log.Warn("Error while reading metadata file for import")
|
||||
return err
|
||||
}
|
||||
|
||||
if config.Keys.Validate {
|
||||
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
|
||||
return fmt.Errorf("validate job meta: %v", err)
|
||||
return fmt.Errorf("REPOSITORY/INIT > validate job meta: %v", err)
|
||||
}
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err := dec.Decode(&jobMeta); err != nil {
|
||||
log.Warn("Error while decoding raw json metadata for import")
|
||||
return err
|
||||
}
|
||||
|
||||
raw, err = os.ReadFile(files[1])
|
||||
if err != nil {
|
||||
log.Warn("Error while reading jobdata file for import")
|
||||
return err
|
||||
}
|
||||
|
||||
if config.Keys.Validate {
|
||||
if err := schema.Validate(schema.Data, bytes.NewReader(raw)); err != nil {
|
||||
return fmt.Errorf("validate job data: %v", err)
|
||||
return fmt.Errorf("REPOSITORY/INIT > validate job data: %v", err)
|
||||
}
|
||||
}
|
||||
dec = json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
jobData := schema.JobData{}
|
||||
if err := dec.Decode(&jobData); err != nil {
|
||||
log.Warn("Error while decoding raw json jobdata for import")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -136,10 +79,11 @@ func HandleImportFlag(flag string) error {
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job in jobRepository")
|
||||
return err
|
||||
}
|
||||
|
||||
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
|
||||
return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
|
||||
}
|
||||
|
||||
job := schema.Job{
|
||||
@ -155,38 +99,45 @@ func HandleImportFlag(flag string) error {
|
||||
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job resources")
|
||||
return err
|
||||
}
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job metadata")
|
||||
return err
|
||||
}
|
||||
|
||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
||||
log.Warn("BaseJob SanityChecks failed")
|
||||
return err
|
||||
}
|
||||
|
||||
if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
|
||||
log.Error("Error while importing job")
|
||||
return err
|
||||
}
|
||||
|
||||
res, err := GetConnection().DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
return err
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
if _, err := GetJobRepository().AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
log.Error("Error while adding or creating tag")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
|
||||
log.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -198,21 +149,17 @@ func InitDB() error {
|
||||
starttime := time.Now()
|
||||
log.Print("Building job table...")
|
||||
|
||||
// Basic database structure:
|
||||
_, err := db.DB.Exec(JobsDBSchema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Inserts are bundled into transactions because in sqlite,
|
||||
// that speeds up inserts A LOT.
|
||||
tx, err := db.DB.Beginx()
|
||||
if err != nil {
|
||||
log.Warn("Error while bundling transactions")
|
||||
return err
|
||||
}
|
||||
|
||||
stmt, err := tx.PrepareNamed(NamedJobInsert)
|
||||
if err != nil {
|
||||
log.Warn("Error while preparing namedJobInsert")
|
||||
return err
|
||||
}
|
||||
tags := make(map[string]int64)
|
||||
@ -232,12 +179,14 @@ func InitDB() error {
|
||||
if i%10 == 0 {
|
||||
if tx != nil {
|
||||
if err := tx.Commit(); err != nil {
|
||||
log.Warn("Error while committing transactions for jobMeta")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tx, err = db.DB.Beginx()
|
||||
if err != nil {
|
||||
log.Warn("Error while bundling transactions for jobMeta")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -260,34 +209,34 @@ func InitDB() error {
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB()- %v", err)
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB()- %v", err)
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
||||
log.Errorf("repository initDB()- %v", err)
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
res, err := stmt.Exec(job)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB()- %v", err)
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB()- %v", err)
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
@ -298,16 +247,19 @@ func InitDB() error {
|
||||
if !ok {
|
||||
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
|
||||
return err
|
||||
}
|
||||
tagId, err = res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return err
|
||||
}
|
||||
tags[tagstr] = tagId
|
||||
}
|
||||
|
||||
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
|
||||
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", id, tagId)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -318,16 +270,11 @@ func InitDB() error {
|
||||
}
|
||||
|
||||
if errorOccured > 0 {
|
||||
log.Errorf("Error in import of %d jobs!", errorOccured)
|
||||
log.Warnf("Error in import of %d jobs!", errorOccured)
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create indexes after inserts so that they do not
|
||||
// need to be continually updated.
|
||||
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
|
||||
log.Warn("Error while committing SQL transactions")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -338,13 +285,14 @@ func InitDB() error {
|
||||
// This function also sets the subcluster if necessary!
|
||||
func SanityChecks(job *schema.BaseJob) error {
|
||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||
return fmt.Errorf("no such cluster: %#v", job.Cluster)
|
||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||
}
|
||||
if err := archive.AssignSubCluster(job); err != nil {
|
||||
log.Warn("Error while assigning subcluster to job")
|
||||
return err
|
||||
}
|
||||
if !job.State.Valid() {
|
||||
return fmt.Errorf("not a valid job state: %#v", job.State)
|
||||
return fmt.Errorf("not a valid job state: %v", job.State)
|
||||
}
|
||||
if len(job.Resources) == 0 || len(job.User) == 0 {
|
||||
return fmt.Errorf("'resources' and 'user' should not be empty")
|
||||
|
@ -14,9 +14,11 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@ -30,7 +32,8 @@ var (
|
||||
)
|
||||
|
||||
type JobRepository struct {
|
||||
DB *sqlx.DB
|
||||
DB *sqlx.DB
|
||||
driver string
|
||||
|
||||
stmtCache *sq.StmtCache
|
||||
cache *lrucache.Cache
|
||||
@ -44,9 +47,11 @@ func GetJobRepository() *JobRepository {
|
||||
db := GetConnection()
|
||||
|
||||
jobRepoInstance = &JobRepository{
|
||||
DB: db.DB,
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
DB: db.DB,
|
||||
driver: db.Driver,
|
||||
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
archiveChannel: make(chan *schema.Job, 128),
|
||||
}
|
||||
// start archiving worker
|
||||
@ -67,14 +72,20 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.Duration, &job.Walltime, &job.RawResources /*&job.MetaData*/); err != nil {
|
||||
&job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||
log.Warn("Error while unmarhsaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
|
||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
||||
if job.Duration == 0 && job.State == schema.JobStateRunning {
|
||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||
@ -84,11 +95,14 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
return job, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
||||
func (r *JobRepository) FetchJobName(job *schema.Job) (*string, error) {
|
||||
start := time.Now()
|
||||
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.MetaData = cached.(map[string]string)
|
||||
return job.MetaData, nil
|
||||
if jobName := job.MetaData["jobName"]; jobName != "" {
|
||||
return &jobName, nil
|
||||
}
|
||||
}
|
||||
|
||||
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
|
||||
@ -105,6 +119,40 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
||||
}
|
||||
|
||||
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
||||
log.Infof("Timer FetchJobName %s", time.Since(start))
|
||||
|
||||
if jobName := job.MetaData["jobName"]; jobName != "" {
|
||||
return &jobName, nil
|
||||
} else {
|
||||
return new(string), nil
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
||||
start := time.Now()
|
||||
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.MetaData = cached.(map[string]string)
|
||||
return job.MetaData, nil
|
||||
}
|
||||
|
||||
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
|
||||
log.Warn("Error while scanning for job metadata")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(job.RawMetaData) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||
log.Warn("Error while unmarshaling raw metadata json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
||||
log.Infof("Timer FetchMetadata %s", time.Since(start))
|
||||
return job.MetaData, nil
|
||||
}
|
||||
|
||||
@ -113,6 +161,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
r.cache.Del(cachekey)
|
||||
if job.MetaData == nil {
|
||||
if _, err = r.FetchMetadata(job); err != nil {
|
||||
log.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -129,10 +178,12 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
}
|
||||
|
||||
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
|
||||
log.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -150,6 +201,7 @@ func (r *JobRepository) Find(
|
||||
cluster *string,
|
||||
startTime *int64) (*schema.Job, error) {
|
||||
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
@ -160,6 +212,7 @@ func (r *JobRepository) Find(
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
log.Infof("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
@ -173,6 +226,7 @@ func (r *JobRepository) FindAll(
|
||||
cluster *string,
|
||||
startTime *int64) ([]*schema.Job, error) {
|
||||
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
@ -185,6 +239,7 @@ func (r *JobRepository) FindAll(
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -192,10 +247,12 @@ func (r *JobRepository) FindAll(
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Infof("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
@ -214,12 +271,12 @@ func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("encoding resources field failed: %w", err)
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("encoding metaData field failed: %w", err)
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
res, err := r.DB.NamedExec(`INSERT INTO job (
|
||||
@ -259,7 +316,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement
|
||||
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime)
|
||||
if err != nil {
|
||||
log.Warnf(" DeleteJobsBefore(%d): error %v", startTime, err)
|
||||
log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err)
|
||||
} else {
|
||||
log.Infof("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
|
||||
}
|
||||
@ -269,7 +326,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id)
|
||||
if err != nil {
|
||||
log.Warnf("DeleteJobById(%d): error %v", id, err)
|
||||
log.Errorf("DeleteJobById(%d): error %#v", id, err)
|
||||
} else {
|
||||
log.Infof("DeleteJobById(%d): Success", id)
|
||||
}
|
||||
@ -278,6 +335,7 @@ func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
|
||||
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
|
||||
func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggregate, filters []*model.JobFilter, weight *model.Weights, limit *int) (map[string]int, error) {
|
||||
start := time.Now()
|
||||
if !aggreg.IsValid() {
|
||||
return nil, errors.New("invalid aggregate")
|
||||
}
|
||||
@ -292,10 +350,12 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
|
||||
now := time.Now().Unix()
|
||||
count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now)
|
||||
runner = r.DB
|
||||
default:
|
||||
log.Infof("CountGroupedJobs() Weight %v unknown.", *weight)
|
||||
}
|
||||
}
|
||||
|
||||
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job." + string(aggreg)).OrderBy("count DESC"))
|
||||
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job."+string(aggreg)).OrderBy("count DESC"))
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
@ -311,6 +371,7 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
|
||||
counts := map[string]int{}
|
||||
rows, err := q.RunWith(runner).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -318,12 +379,14 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
|
||||
var group string
|
||||
var count int
|
||||
if err := rows.Scan(&group, &count); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
counts[group] = count
|
||||
}
|
||||
|
||||
log.Infof("Timer CountGroupedJobs %s", time.Since(start))
|
||||
return counts, nil
|
||||
}
|
||||
|
||||
@ -360,20 +423,23 @@ func (r *JobRepository) MarkArchived(
|
||||
stmt = stmt.Set("net_bw_avg", stats.Avg)
|
||||
case "file_bw":
|
||||
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
||||
default:
|
||||
log.Infof("MarkArchived() Metric '%v' unknown", metric)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warn("Error while marking job as archived")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
func (r *JobRepository) archivingWorker(){
|
||||
func (r *JobRepository) archivingWorker() {
|
||||
for {
|
||||
select {
|
||||
case job, ok := <- r.archiveChannel:
|
||||
case job, ok := <-r.archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
@ -407,54 +473,216 @@ func (r *JobRepository) archivingWorker(){
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
func (r *JobRepository) TriggerArchiving(job *schema.Job){
|
||||
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
|
||||
r.archivePending.Add(1)
|
||||
r.archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func (r *JobRepository) WaitForArchiving(){
|
||||
func (r *JobRepository) WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
r.archivePending.Wait()
|
||||
}
|
||||
|
||||
var ErrNotFound = errors.New("no such job or user")
|
||||
var ErrNotFound = errors.New("no such jobname, project or user")
|
||||
var ErrForbidden = errors.New("not authorized")
|
||||
|
||||
// FindJobOrUser returns a job database ID or a username if a job or user machtes the search term.
|
||||
// As 0 is a valid job id, check if username is "" instead in order to check what machted.
|
||||
// FindJobnameOrUserOrProject returns a jobName or a username or a projectId if a jobName or user or project matches the search term.
|
||||
// If query is found to be an integer (= conversion to INT datatype succeeds), skip back to parent call
|
||||
// If nothing matches the search, `ErrNotFound` is returned.
|
||||
func (r *JobRepository) FindJobOrUser(ctx context.Context, searchterm string) (job int64, username string, err error) {
|
||||
|
||||
func (r *JobRepository) FindJobnameOrUserOrProject(ctx context.Context, searchterm string) (metasnip string, username string, project string, err error) {
|
||||
user := auth.GetUser(ctx)
|
||||
if id, err := strconv.Atoi(searchterm); err == nil {
|
||||
qb := sq.Select("job.id").From("job").Where("job.job_id = ?", id)
|
||||
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
||||
return "", "", "", nil
|
||||
} else { // has to have letters
|
||||
|
||||
if user != nil && user.HasNotRoles([]string{auth.RoleAdmin, auth.RoleSupport}) {
|
||||
qb = qb.Where("job.user = ?", user.Username)
|
||||
err := sq.Select("job.user").Distinct().From("job").
|
||||
Where("job.user = ?", searchterm).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&username)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return "", "", "", err
|
||||
} else if err == nil {
|
||||
return "", username, "", nil
|
||||
}
|
||||
|
||||
if username == "" { // Try with Name2Username query
|
||||
errtwo := sq.Select("user.username").Distinct().From("user").
|
||||
Where("user.name LIKE ?", fmt.Sprint("%"+searchterm+"%")).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&username)
|
||||
if errtwo != nil && errtwo != sql.ErrNoRows {
|
||||
return "", "", "", errtwo
|
||||
} else if errtwo == nil {
|
||||
return "", username, "", nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err := qb.RunWith(r.stmtCache).QueryRow().Scan(&job)
|
||||
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
|
||||
err := sq.Select("job.project").Distinct().From("job").
|
||||
Where("job.project = ?", searchterm).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&project)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return "", "", "", err
|
||||
} else if err == nil {
|
||||
return "", "", project, nil
|
||||
}
|
||||
}
|
||||
|
||||
// All Authorizations: If unlabeled query not username or projectId, try for jobname: Match Metadata, on hit, parent method redirects to jobName GQL query
|
||||
err := sq.Select("job.cluster").Distinct().From("job").
|
||||
Where("job.meta_data LIKE ?", "%"+searchterm+"%").
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&metasnip)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return 0, "", err
|
||||
return "", "", "", err
|
||||
} else if err == nil {
|
||||
return job, "", nil
|
||||
return metasnip[0:1], "", "", nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", "", "", ErrNotFound
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindUser(ctx context.Context, searchterm string) (username string, err error) {
|
||||
user := auth.GetUser(ctx)
|
||||
if user == nil || user.HasAnyRole([]string{auth.RoleAdmin, auth.RoleSupport}) {
|
||||
err := sq.Select("job.user").Distinct().From("job").
|
||||
Where("job.user = ?", searchterm).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&username)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return 0, "", err
|
||||
return "", err
|
||||
} else if err == nil {
|
||||
return 0, username, nil
|
||||
return username, nil
|
||||
}
|
||||
}
|
||||
return "", ErrNotFound
|
||||
|
||||
return 0, "", ErrNotFound
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query Username -> %s: Forbidden", user.Name, searchterm)
|
||||
return "", ErrForbidden
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindUserByName(ctx context.Context, searchterm string) (username string, err error) {
|
||||
user := auth.GetUser(ctx)
|
||||
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
|
||||
err := sq.Select("user.username").Distinct().From("user").
|
||||
Where("user.name = ?", searchterm).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&username)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return "", err
|
||||
} else if err == nil {
|
||||
return username, nil
|
||||
}
|
||||
return "", ErrNotFound
|
||||
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query Name -> %s: Forbidden", user.Name, searchterm)
|
||||
return "", ErrForbidden
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindUsers(ctx context.Context, searchterm string) (usernames []string, err error) {
|
||||
user := auth.GetUser(ctx)
|
||||
emptyResult := make([]string, 0)
|
||||
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
|
||||
rows, err := sq.Select("job.user").Distinct().From("job").
|
||||
Where("job.user LIKE ?", fmt.Sprint("%", searchterm, "%")).
|
||||
RunWith(r.stmtCache).Query()
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return emptyResult, err
|
||||
} else if err == nil {
|
||||
for rows.Next() {
|
||||
var name string
|
||||
err := rows.Scan(&name)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warnf("Error while scanning rows: %v", err)
|
||||
return emptyResult, err
|
||||
}
|
||||
usernames = append(usernames, name)
|
||||
}
|
||||
return usernames, nil
|
||||
}
|
||||
return emptyResult, ErrNotFound
|
||||
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query Usernames -> %s: Forbidden", user.Name, searchterm)
|
||||
return emptyResult, ErrForbidden
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindUsersByName(ctx context.Context, searchterm string) (usernames []string, err error) {
|
||||
user := auth.GetUser(ctx)
|
||||
emptyResult := make([]string, 0)
|
||||
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
|
||||
rows, err := sq.Select("user.username").Distinct().From("user").
|
||||
Where("user.name LIKE ?", fmt.Sprint("%", searchterm, "%")).
|
||||
RunWith(r.stmtCache).Query()
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return emptyResult, err
|
||||
} else if err == nil {
|
||||
for rows.Next() {
|
||||
var username string
|
||||
err := rows.Scan(&username)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warnf("Error while scanning rows: %v", err)
|
||||
return emptyResult, err
|
||||
}
|
||||
usernames = append(usernames, username)
|
||||
}
|
||||
return usernames, nil
|
||||
}
|
||||
return emptyResult, ErrNotFound
|
||||
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query name -> %s: Forbidden", user.Name, searchterm)
|
||||
return emptyResult, ErrForbidden
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindNameByUser(ctx context.Context, searchterm string) (name string, err error) {
|
||||
user := auth.GetUser(ctx)
|
||||
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
|
||||
err := sq.Select("user.name").Distinct().From("user").
|
||||
Where("user.username = ?", searchterm).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&name)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return "", err
|
||||
} else if err == nil {
|
||||
return name, nil
|
||||
}
|
||||
return "", ErrNotFound
|
||||
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query Name -> %s: Forbidden", user.Name, searchterm)
|
||||
return "", ErrForbidden
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindProject(ctx context.Context, searchterm string) (project string, err error) {
|
||||
|
||||
user := auth.GetUser(ctx)
|
||||
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
|
||||
err := sq.Select("job.project").Distinct().From("job").
|
||||
Where("job.project = ?", searchterm).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&project)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return "", err
|
||||
} else if err == nil {
|
||||
return project, nil
|
||||
}
|
||||
return "", ErrNotFound
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query Project -> %s: Forbidden", user.Name, project)
|
||||
return "", ErrForbidden
|
||||
}
|
||||
}
|
||||
|
||||
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
var err error
|
||||
start := time.Now()
|
||||
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
||||
parts := []string{}
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||
@ -466,18 +694,22 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Infof("Timer Partitions %s", time.Since(start))
|
||||
return partitions.([]string), nil
|
||||
}
|
||||
|
||||
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
|
||||
// Hosts with zero jobs running on them will not show up!
|
||||
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
|
||||
|
||||
start := time.Now()
|
||||
subclusters := make(map[string]map[string]int)
|
||||
rows, err := sq.Select("resources", "subcluster").From("job").
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.cluster = ?", cluster).
|
||||
RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -488,9 +720,11 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
var resources []*schema.Resource
|
||||
var subcluster string
|
||||
if err := rows.Scan(&raw, &subcluster); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
if err := json.Unmarshal(raw, &resources); err != nil {
|
||||
log.Warn("Error while unmarshaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -505,10 +739,13 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Timer AllocatedNodes %s", time.Since(start))
|
||||
return subclusters, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
|
||||
start := time.Now()
|
||||
res, err := sq.Update("job").
|
||||
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
|
||||
Set("duration", 0).
|
||||
@ -518,16 +755,243 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
|
||||
RunWith(r.DB).Exec()
|
||||
if err != nil {
|
||||
log.Warn("Error while stopping jobs exceeding walltime")
|
||||
return err
|
||||
}
|
||||
|
||||
rowsAffected, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching affected rows after stopping due to exceeded walltime")
|
||||
return err
|
||||
}
|
||||
|
||||
if rowsAffected > 0 {
|
||||
log.Warnf("%d jobs have been marked as failed due to running too long", rowsAffected)
|
||||
log.Infof("%d jobs have been marked as failed due to running too long", rowsAffected)
|
||||
}
|
||||
log.Infof("Timer StopJobsExceedingWalltimeBy %s", time.Since(start))
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: Move to config
|
||||
const ShortJobDuration int = 5 * 60
|
||||
|
||||
// GraphQL validation should make sure that no unkown values can be specified.
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.user",
|
||||
model.AggregateProject: "job.project",
|
||||
model.AggregateCluster: "job.cluster",
|
||||
}
|
||||
|
||||
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
func (r *JobRepository) JobsStatistics(ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
|
||||
start := time.Now()
|
||||
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
|
||||
stats := map[string]*model.JobsStatistics{}
|
||||
var castType string
|
||||
|
||||
if r.driver == "sqlite3" {
|
||||
castType = "int"
|
||||
} else if r.driver == "mysql" {
|
||||
castType = "unsigned"
|
||||
}
|
||||
|
||||
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
||||
for _, cluster := range archive.Clusters {
|
||||
for _, subcluster := range cluster.SubClusters {
|
||||
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as %s)", subcluster.SocketsPerNode, subcluster.CoresPerSocket, castType)
|
||||
var rawQuery sq.SelectBuilder
|
||||
if groupBy == nil {
|
||||
rawQuery = sq.Select(
|
||||
"''",
|
||||
"COUNT(job.id)",
|
||||
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
|
||||
corehoursCol,
|
||||
).From("job")
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
rawQuery = sq.Select(
|
||||
col,
|
||||
"COUNT(job.id)",
|
||||
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
|
||||
corehoursCol,
|
||||
).From("job").GroupBy(col)
|
||||
}
|
||||
|
||||
rawQuery = rawQuery.
|
||||
Where("job.cluster = ?", cluster.Name).
|
||||
Where("job.subcluster = ?", subcluster.Name)
|
||||
|
||||
query, qerr := SecurityCheck(ctx, rawQuery)
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filter {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
log.Warn("Error while querying DB for job statistics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var jobs, walltime, corehours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
if s, ok := stats[id.String]; ok {
|
||||
s.TotalJobs += int(jobs.Int64)
|
||||
s.TotalWalltime += int(walltime.Int64)
|
||||
s.TotalCoreHours += int(corehours.Int64)
|
||||
} else {
|
||||
stats[id.String] = &model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(jobs.Int64),
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalCoreHours: int(corehours.Int64),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
|
||||
query, qerr := SecurityCheck(ctx, query)
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filter {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
|
||||
log.Warn("Error while scanning rows for short job stats")
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
|
||||
|
||||
query, qerr := SecurityCheck(ctx, query)
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filter {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
log.Warn("Error while querying jobs for short jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var shortJobs sql.NullInt64
|
||||
if err := rows.Scan(&id, &shortJobs); err != nil {
|
||||
log.Warn("Error while scanning rows for short jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
stats[id.String].ShortJobs = int(shortJobs.Int64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculating the histogram data is expensive, so only do it if needed.
|
||||
// An explicit resolver can not be used because we need to know the filters.
|
||||
histogramsNeeded := false
|
||||
fields := graphql.CollectFieldsCtx(ctx, nil)
|
||||
for _, col := range fields {
|
||||
if col.Name == "histDuration" || col.Name == "histNumNodes" {
|
||||
histogramsNeeded = true
|
||||
}
|
||||
}
|
||||
|
||||
res := make([]*model.JobsStatistics, 0, len(stats))
|
||||
for _, stat := range stats {
|
||||
res = append(res, stat)
|
||||
id, col := "", ""
|
||||
if groupBy != nil {
|
||||
id = stat.ID
|
||||
col = groupBy2column[*groupBy]
|
||||
}
|
||||
|
||||
if histogramsNeeded {
|
||||
var err error
|
||||
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job statistics histogram: running jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job statistics histogram: num nodes")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Timer JobStatistics %s", time.Since(start))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
|
||||
// to add a condition to the query of the kind "<col> = <id>".
|
||||
func (r *JobRepository) jobsStatisticsHistogram(ctx context.Context,
|
||||
value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
|
||||
|
||||
start := time.Now()
|
||||
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
|
||||
query, qerr := SecurityCheck(ctx, sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
if len(id) != 0 && len(col) != 0 {
|
||||
query = query.Where(col+" = ?", id)
|
||||
}
|
||||
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
points := make([]*model.HistoPoint, 0)
|
||||
for rows.Next() {
|
||||
point := model.HistoPoint{}
|
||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
points = append(points, &point)
|
||||
}
|
||||
log.Infof("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||
return points, nil
|
||||
}
|
||||
|
@ -8,10 +8,12 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.Init("info", true)
|
||||
Connect("sqlite3", "../../test/test.db")
|
||||
}
|
||||
|
||||
|
109
internal/repository/migration.go
Normal file
109
internal/repository/migration.go
Normal file
@ -0,0 +1,109 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"embed"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/golang-migrate/migrate/v4"
|
||||
"github.com/golang-migrate/migrate/v4/database/mysql"
|
||||
"github.com/golang-migrate/migrate/v4/database/sqlite3"
|
||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||
)
|
||||
|
||||
const supportedVersion uint = 2
|
||||
|
||||
//go:embed migrations/*
|
||||
var migrationFiles embed.FS
|
||||
|
||||
func checkDBVersion(backend string, db *sql.DB) {
|
||||
var m *migrate.Migrate
|
||||
|
||||
if backend == "sqlite3" {
|
||||
|
||||
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
} else if backend == "mysql" {
|
||||
driver, err := mysql.WithInstance(db, &mysql.Config{})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
d, err := iofs.New(migrationFiles, "migrations/mysql")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithInstance("iofs", d, "mysql", driver)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
v, _, err := m.Version()
|
||||
if err != nil {
|
||||
if err == migrate.ErrNilVersion {
|
||||
log.Warn("Legacy database without version or missing database file!")
|
||||
} else {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if v < supportedVersion {
|
||||
log.Warnf("Unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend --migrate-db", v, supportedVersion)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if v > supportedVersion {
|
||||
log.Warnf("Unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool!", v, supportedVersion)
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
func MigrateDB(backend string, db string) {
|
||||
var m *migrate.Migrate
|
||||
|
||||
if backend == "sqlite3" {
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
} else if backend == "mysql" {
|
||||
d, err := iofs.New(migrationFiles, "migrations/mysql")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := m.Up(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
m.Close()
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
DROP TABLE IF EXISTS job;
|
||||
DROP TABLE IF EXISTS tags;
|
||||
DROP TABLE IF EXISTS jobtag;
|
||||
DROP TABLE IF EXISTS configuration;
|
||||
DROP TABLE IF EXISTS user;
|
62
internal/repository/migrations/mysql/01_init-schema.up.sql
Normal file
62
internal/repository/migrations/mysql/01_init-schema.up.sql
Normal file
@ -0,0 +1,62 @@
|
||||
CREATE TABLE IF NOT EXISTS job (
|
||||
id INTEGER AUTO_INCREMENT PRIMARY KEY ,
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster VARCHAR(255) NOT NULL,
|
||||
subcluster VARCHAR(255) NOT NULL,
|
||||
start_time BIGINT NOT NULL, -- Unix timestamp
|
||||
|
||||
user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
`partition` VARCHAR(255) NOT NULL,
|
||||
array_job_id BIGINT NOT NULL,
|
||||
duration INT NOT NULL DEFAULT 0,
|
||||
walltime INT NOT NULL DEFAULT 0,
|
||||
job_state VARCHAR(255) NOT NULL
|
||||
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
|
||||
'stopped', 'timeout', 'preempted', 'out_of_memory')),
|
||||
meta_data TEXT, -- JSON
|
||||
resources TEXT NOT NULL, -- JSON
|
||||
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT NOT NULL,
|
||||
num_acc INT NOT NULL,
|
||||
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
|
||||
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
|
||||
|
||||
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
load_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||
file_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tag (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tag_type VARCHAR(255) NOT NULL,
|
||||
tag_name VARCHAR(255) NOT NULL,
|
||||
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
|
||||
|
||||
CREATE TABLE IF NOT EXISTS jobtag (
|
||||
job_id INTEGER,
|
||||
tag_id INTEGER,
|
||||
PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS configuration (
|
||||
username varchar(255),
|
||||
confkey varchar(255),
|
||||
value varchar(255),
|
||||
PRIMARY KEY (username, confkey),
|
||||
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
username varchar(255) PRIMARY KEY NOT NULL,
|
||||
password varchar(255) DEFAULT NULL,
|
||||
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
|
||||
name varchar(255) DEFAULT NULL,
|
||||
roles varchar(255) NOT NULL DEFAULT "[]",
|
||||
email varchar(255) DEFAULT NULL);
|
@ -0,0 +1,5 @@
|
||||
DROP INDEX IF EXISTS job_stats;
|
||||
DROP INDEX IF EXISTS job_by_user;
|
||||
DROP INDEX IF EXISTS job_by_starttime;
|
||||
DROP INDEX IF EXISTS job_by_job_id;
|
||||
DROP INDEX IF EXISTS job_by_state;
|
5
internal/repository/migrations/mysql/02_add-index.up.sql
Normal file
5
internal/repository/migrations/mysql/02_add-index.up.sql
Normal file
@ -0,0 +1,5 @@
|
||||
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
|
||||
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
|
||||
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
|
||||
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);
|
@ -0,0 +1,5 @@
|
||||
DROP TABLE IF EXISTS job;
|
||||
DROP TABLE IF EXISTS tags;
|
||||
DROP TABLE IF EXISTS jobtag;
|
||||
DROP TABLE IF EXISTS configuration;
|
||||
DROP TABLE IF EXISTS user;
|
62
internal/repository/migrations/sqlite3/01_init-schema.up.sql
Normal file
62
internal/repository/migrations/sqlite3/01_init-schema.up.sql
Normal file
@ -0,0 +1,62 @@
|
||||
CREATE TABLE IF NOT EXISTS job (
|
||||
id INTEGER PRIMARY KEY,
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster VARCHAR(255) NOT NULL,
|
||||
subcluster VARCHAR(255) NOT NULL,
|
||||
start_time BIGINT NOT NULL, -- Unix timestamp
|
||||
|
||||
user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
partition VARCHAR(255) NOT NULL,
|
||||
array_job_id BIGINT NOT NULL,
|
||||
duration INT NOT NULL DEFAULT 0,
|
||||
walltime INT NOT NULL DEFAULT 0,
|
||||
job_state VARCHAR(255) NOT NULL
|
||||
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
|
||||
'stopped', 'timeout', 'preempted', 'out_of_memory')),
|
||||
meta_data TEXT, -- JSON
|
||||
resources TEXT NOT NULL, -- JSON
|
||||
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT NOT NULL,
|
||||
num_acc INT NOT NULL,
|
||||
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
|
||||
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
|
||||
|
||||
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
load_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||
file_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tag (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tag_type VARCHAR(255) NOT NULL,
|
||||
tag_name VARCHAR(255) NOT NULL,
|
||||
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
|
||||
|
||||
CREATE TABLE IF NOT EXISTS jobtag (
|
||||
job_id INTEGER,
|
||||
tag_id INTEGER,
|
||||
PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS configuration (
|
||||
username varchar(255),
|
||||
confkey varchar(255),
|
||||
value varchar(255),
|
||||
PRIMARY KEY (username, confkey),
|
||||
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
username varchar(255) PRIMARY KEY NOT NULL,
|
||||
password varchar(255) DEFAULT NULL,
|
||||
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
|
||||
name varchar(255) DEFAULT NULL,
|
||||
roles varchar(255) NOT NULL DEFAULT "[]",
|
||||
email varchar(255) DEFAULT NULL);
|
@ -0,0 +1,5 @@
|
||||
DROP INDEX IF EXISTS job_stats;
|
||||
DROP INDEX IF EXISTS job_by_user;
|
||||
DROP INDEX IF EXISTS job_by_starttime;
|
||||
DROP INDEX IF EXISTS job_by_job_id;
|
||||
DROP INDEX IF EXISTS job_by_state;
|
@ -0,0 +1,5 @@
|
||||
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
|
||||
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
|
||||
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
|
||||
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);
|
@ -39,7 +39,7 @@ func (r *JobRepository) QueryJobs(
|
||||
} else if order.Order == model.SortDirectionEnumDesc {
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
} else {
|
||||
return nil, errors.New("invalid sorting order")
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
|
||||
}
|
||||
}
|
||||
|
||||
@ -54,12 +54,14 @@ func (r *JobRepository) QueryJobs(
|
||||
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
log.Warn("Error while converting query to sql")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debugf("SQL query: `%s`, args: %#v", sql, args)
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -68,6 +70,7 @@ func (r *JobRepository) QueryJobs(
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
@ -135,6 +138,9 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
if filter.Project != nil {
|
||||
query = buildStringCondition("job.project", filter.Project, query)
|
||||
}
|
||||
if filter.JobName != nil {
|
||||
query = buildStringCondition("job.meta_data", filter.JobName, query)
|
||||
}
|
||||
if filter.Cluster != nil {
|
||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||
}
|
||||
@ -217,6 +223,13 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
|
||||
if cond.Contains != nil {
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
if cond.In != nil {
|
||||
queryUsers := make([]string, len(cond.In))
|
||||
for i, val := range cond.In {
|
||||
queryUsers[i] = val
|
||||
}
|
||||
return query.Where(sq.Or{sq.Eq{"job.user": queryUsers}})
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
@ -226,7 +239,7 @@ var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
func toSnakeCase(str string) string {
|
||||
for _, c := range str {
|
||||
if c == '\'' || c == '\\' {
|
||||
panic("A hacker (probably not)!!!")
|
||||
log.Panic("toSnakeCase() attack vector!")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,22 +9,26 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
||||
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -34,16 +38,19 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
// Removes a tag from a job
|
||||
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
|
||||
if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -144,6 +151,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -151,6 +159,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, tag)
|
||||
|
@ -6,13 +6,13 @@ package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
@ -33,21 +33,9 @@ func GetUserCfgRepo() *UserCfgRepo {
|
||||
userCfgRepoOnce.Do(func() {
|
||||
db := GetConnection()
|
||||
|
||||
_, err := db.DB.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS configuration (
|
||||
username varchar(255),
|
||||
confkey varchar(255),
|
||||
value varchar(255),
|
||||
PRIMARY KEY (username, confkey),
|
||||
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalf("db.DB.Preparex() error: %v", err)
|
||||
}
|
||||
|
||||
userCfgRepoInstance = &UserCfgRepo{
|
||||
@ -82,6 +70,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
|
||||
|
||||
rows, err := uCfg.Lookup.Query(user.Username)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking up user config for user '%v'", user.Username)
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
@ -90,11 +79,13 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
|
||||
for rows.Next() {
|
||||
var key, rawval string
|
||||
if err := rows.Scan(&key, &rawval); err != nil {
|
||||
log.Warn("Error while scanning user config values")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
|
||||
log.Warn("Error while unmarshaling raw user config json")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
@ -106,6 +97,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
|
||||
return config, 24 * time.Hour, size
|
||||
})
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -122,6 +114,7 @@ func (uCfg *UserCfgRepo) UpdateConfig(
|
||||
if user == nil {
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||
log.Warn("Error while unmarshaling raw user config json")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -131,8 +124,8 @@ func (uCfg *UserCfgRepo) UpdateConfig(
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
|
||||
user.Username, key, value); err != nil {
|
||||
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`, user, key, value); err != nil {
|
||||
log.Warnf("Error while replacing user config in DB for user '%v'", user)
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -12,8 +12,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@ -44,7 +44,7 @@ var routes []Route = []Route{
|
||||
{"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute},
|
||||
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> - ClusterCockpit", false, setupClusterRoute},
|
||||
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
|
||||
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analaysis - ClusterCockpit", true, setupAnalysisRoute},
|
||||
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
|
||||
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterRoute},
|
||||
}
|
||||
|
||||
@ -61,21 +61,21 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
||||
State: []schema.JobState{schema.JobStateRunning},
|
||||
}}, nil, nil)
|
||||
if err != nil {
|
||||
log.Errorf("failed to count jobs: %s", err.Error())
|
||||
log.Warnf("failed to count jobs: %s", err.Error())
|
||||
runningJobs = map[string]int{}
|
||||
}
|
||||
totalJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, nil, nil, nil)
|
||||
if err != nil {
|
||||
log.Errorf("failed to count jobs: %s", err.Error())
|
||||
log.Warnf("failed to count jobs: %s", err.Error())
|
||||
totalJobs = map[string]int{}
|
||||
}
|
||||
from := time.Now().Add(-24 * time.Hour)
|
||||
recentShortJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
|
||||
StartTime: &schema.TimeRange{From: &from, To: nil},
|
||||
Duration: &schema.IntRange{From: 0, To: graph.ShortJobDuration},
|
||||
Duration: &schema.IntRange{From: 0, To: repository.ShortJobDuration},
|
||||
}}, nil, nil)
|
||||
if err != nil {
|
||||
log.Errorf("failed to count jobs: %s", err.Error())
|
||||
log.Warnf("failed to count jobs: %s", err.Error())
|
||||
recentShortJobs = map[string]int{}
|
||||
}
|
||||
|
||||
@ -158,7 +158,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
|
||||
tags, counts, err := jobRepo.CountTags(username, projects)
|
||||
tagMap := make(map[string][]map[string]interface{})
|
||||
if err != nil {
|
||||
log.Errorf("GetTags failed: %s", err.Error())
|
||||
log.Warnf("GetTags failed: %s", err.Error())
|
||||
i["tagmap"] = tagMap
|
||||
return i
|
||||
}
|
||||
@ -188,9 +188,17 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
filterPresets["project"] = query.Get("project")
|
||||
filterPresets["projectMatch"] = "eq"
|
||||
}
|
||||
if query.Get("user") != "" {
|
||||
filterPresets["user"] = query.Get("user")
|
||||
filterPresets["userMatch"] = "eq"
|
||||
if query.Get("jobName") != "" {
|
||||
filterPresets["jobName"] = query.Get("jobName")
|
||||
}
|
||||
if len(query["user"]) != 0 {
|
||||
if len(query["user"]) == 1 {
|
||||
filterPresets["user"] = query.Get("user")
|
||||
filterPresets["userMatch"] = "contains"
|
||||
} else {
|
||||
filterPresets["user"] = query["user"]
|
||||
filterPresets["userMatch"] = "in"
|
||||
}
|
||||
}
|
||||
if len(query["state"]) != 0 {
|
||||
filterPresets["state"] = query["state"]
|
||||
@ -301,3 +309,81 @@ func SetupRoutes(router *mux.Router, version string, hash string, buildTime stri
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func HandleSearchBar(rw http.ResponseWriter, r *http.Request, api *api.RestApi) {
|
||||
if search := r.URL.Query().Get("searchId"); search != "" {
|
||||
splitSearch := strings.Split(search, ":")
|
||||
|
||||
if len(splitSearch) == 2 {
|
||||
switch strings.Trim(splitSearch[0], " ") {
|
||||
case "jobId":
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
|
||||
return
|
||||
case "jobName":
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
|
||||
return
|
||||
case "projectId":
|
||||
project, _ := api.JobRepository.FindProject(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: projectId
|
||||
if project != "" {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(project), http.StatusTemporaryRedirect)
|
||||
return
|
||||
} else {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobId=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
|
||||
}
|
||||
case "username":
|
||||
usernames, _ := api.JobRepository.FindUsers(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: usernames
|
||||
if len(usernames) == 1 {
|
||||
http.Redirect(rw, r, "/monitoring/user/"+usernames[0], http.StatusTemporaryRedirect) // One Match: Redirect to User View
|
||||
return
|
||||
} else if len(usernames) > 1 {
|
||||
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // > 1 Matches: Redirect to user table
|
||||
return
|
||||
} else {
|
||||
http.Redirect(rw, r, "/monitoring/users/?user=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
|
||||
}
|
||||
case "name":
|
||||
usernames, _ := api.JobRepository.FindUsersByName(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: usernames queried by name
|
||||
if len(usernames) == 1 {
|
||||
http.Redirect(rw, r, "/monitoring/user/"+usernames[0], http.StatusTemporaryRedirect)
|
||||
return
|
||||
} else if len(usernames) > 1 {
|
||||
joinedNames := strings.Join(usernames, "&user=")
|
||||
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames, http.StatusTemporaryRedirect) // > 1 Matches: Redirect to user table
|
||||
return
|
||||
} else {
|
||||
http.Redirect(rw, r, "/monitoring/users/?user=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
|
||||
}
|
||||
default:
|
||||
http.Error(rw, "'searchId' type parameter unknown", http.StatusBadRequest)
|
||||
}
|
||||
|
||||
} else if len(splitSearch) == 1 {
|
||||
jobname, username, project, err := api.JobRepository.FindJobnameOrUserOrProject(r.Context(), strings.Trim(search, " ")) // Determine Access within
|
||||
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if username != "" {
|
||||
http.Redirect(rw, r, "/monitoring/user/"+username, http.StatusTemporaryRedirect) // User: Redirect to user page
|
||||
return
|
||||
} else if project != "" {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // projectId (equal)
|
||||
return
|
||||
} else if jobname != "" {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // JobName (contains)
|
||||
return
|
||||
} else {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // No Result: Probably jobId
|
||||
return
|
||||
}
|
||||
|
||||
} else {
|
||||
http.Error(rw, "'searchId' query parameter malformed", http.StatusBadRequest)
|
||||
}
|
||||
|
||||
} else {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect)
|
||||
}
|
||||
}
|
||||
|
@ -14,6 +14,8 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
// Very simple and limited .env file reader.
|
||||
@ -22,6 +24,7 @@ import (
|
||||
func LoadEnv(file string) error {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
log.Error("Error while opening file")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -40,14 +43,14 @@ func LoadEnv(file string) error {
|
||||
line = strings.TrimPrefix(line, "export ")
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return fmt.Errorf("unsupported line: %#v", line)
|
||||
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
|
||||
}
|
||||
|
||||
key := strings.TrimSpace(parts[0])
|
||||
val := strings.TrimSpace(parts[1])
|
||||
if strings.HasPrefix(val, "\"") {
|
||||
if !strings.HasSuffix(val, "\"") {
|
||||
return fmt.Errorf("unsupported line: %#v", line)
|
||||
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
|
||||
}
|
||||
|
||||
runes := []rune(val[1 : len(val)-1])
|
||||
@ -65,7 +68,7 @@ func LoadEnv(file string) error {
|
||||
case '"':
|
||||
sb.WriteRune('"')
|
||||
default:
|
||||
return fmt.Errorf("unsupprorted escape sequence in quoted string: backslash %#v", runes[i])
|
||||
return fmt.Errorf("RUNTIME/SETUP > unsupported escape sequence in quoted string: backslash %#v", runes[i])
|
||||
}
|
||||
continue
|
||||
}
|
||||
@ -89,11 +92,13 @@ func DropPrivileges(username string, group string) error {
|
||||
if group != "" {
|
||||
g, err := user.LookupGroup(group)
|
||||
if err != nil {
|
||||
log.Warn("Error while looking up group")
|
||||
return err
|
||||
}
|
||||
|
||||
gid, _ := strconv.Atoi(g.Gid)
|
||||
if err := syscall.Setgid(gid); err != nil {
|
||||
log.Warn("Error while setting gid")
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -101,11 +106,13 @@ func DropPrivileges(username string, group string) error {
|
||||
if username != "" {
|
||||
u, err := user.Lookup(username)
|
||||
if err != nil {
|
||||
log.Warn("Error while looking up user")
|
||||
return err
|
||||
}
|
||||
|
||||
uid, _ := strconv.Atoi(u.Uid)
|
||||
if err := syscall.Setuid(uid); err != nil {
|
||||
log.Warn("Error while setting uid")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
type ArchiveBackend interface {
|
||||
@ -40,6 +41,7 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
if err := json.Unmarshal(rawConfig, &kind); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -49,10 +51,11 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||
// case "s3":
|
||||
// ar = &S3Archive{}
|
||||
default:
|
||||
return fmt.Errorf("unkown archive backend '%s''", kind.Kind)
|
||||
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", kind.Kind)
|
||||
}
|
||||
|
||||
if err := ar.Init(rawConfig); err != nil {
|
||||
log.Error("Error while initializing archiveBackend")
|
||||
return err
|
||||
}
|
||||
return initClusterConfig()
|
||||
@ -70,6 +73,7 @@ func LoadAveragesFromArchive(
|
||||
|
||||
metaFile, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job metadata from archiveBackend")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -88,6 +92,7 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
||||
|
||||
metaFile, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job metadata from archiveBackend")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -104,6 +109,7 @@ func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||
|
||||
jobMeta, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job metadata from archiveBackend")
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
var Clusters []*schema.Cluster
|
||||
@ -23,6 +24,7 @@ func initClusterConfig() error {
|
||||
|
||||
cluster, err := ar.LoadClusterCfg(c)
|
||||
if err != nil {
|
||||
log.Warnf("Error while loading cluster config for cluster '%v'", c)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -59,7 +61,7 @@ func initClusterConfig() error {
|
||||
|
||||
nl, err := ParseNodeList(sc.Nodes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
|
||||
}
|
||||
nodeLists[cluster.Name][sc.Name] = nl
|
||||
}
|
||||
@ -112,7 +114,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
||||
|
||||
cluster := GetCluster(job.Cluster)
|
||||
if cluster == nil {
|
||||
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
||||
}
|
||||
|
||||
if job.SubCluster != "" {
|
||||
@ -121,11 +123,11 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > already assigned subcluster %v unkown (cluster: %v)", job.SubCluster, job.Cluster)
|
||||
}
|
||||
|
||||
if len(job.Resources) == 0 {
|
||||
return fmt.Errorf("job without any resources/hosts")
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > job without any resources/hosts")
|
||||
}
|
||||
|
||||
host0 := job.Resources[0].Hostname
|
||||
@ -141,7 +143,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", job.Cluster, host0)
|
||||
}
|
||||
|
||||
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||
@ -154,12 +156,12 @@ func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||
|
||||
c := GetCluster(cluster)
|
||||
if c == nil {
|
||||
return "", fmt.Errorf("unkown cluster: %#v", cluster)
|
||||
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", cluster)
|
||||
}
|
||||
|
||||
if c.SubClusters[0].Nodes == "" {
|
||||
return c.SubClusters[0].Name, nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
|
||||
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ func loadJobMeta(filename string) (*schema.JobMeta, error) {
|
||||
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
log.Errorf("fsBackend loadJobMeta()- %v", err)
|
||||
log.Errorf("loadJobMeta() > open file error: %v", err)
|
||||
return &schema.JobMeta{}, err
|
||||
}
|
||||
defer f.Close()
|
||||
@ -58,19 +58,19 @@ func (fsa *FsArchive) Init(rawConfig json.RawMessage) error {
|
||||
|
||||
var config FsArchiveConfig
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Errorf("fsBackend Init()- %v", err)
|
||||
log.Warnf("Init() > Unmarshal error: %#v", err)
|
||||
return err
|
||||
}
|
||||
if config.Path == "" {
|
||||
err := fmt.Errorf("fsBackend Init()- empty path")
|
||||
log.Errorf("fsBackend Init()- %v", err)
|
||||
err := fmt.Errorf("Init() : empty config.Path")
|
||||
log.Errorf("Init() > config.Path error: %v", err)
|
||||
return err
|
||||
}
|
||||
fsa.path = config.Path
|
||||
|
||||
entries, err := os.ReadDir(fsa.path)
|
||||
if err != nil {
|
||||
log.Errorf("fsBackend Init()- %v", err)
|
||||
log.Errorf("Init() > ReadDir() error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
|
||||
filename := getPath(job, fsa.path, "data.json")
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
log.Errorf("fsBackend LoadJobData()- %v", err)
|
||||
log.Errorf("LoadJobData() > open file error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
@ -104,11 +104,12 @@ func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
||||
|
||||
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
||||
if err != nil {
|
||||
log.Errorf("fsBackend LoadClusterCfg()- %v", err)
|
||||
log.Errorf("LoadClusterCfg() > open file error: %v", err)
|
||||
return &schema.Cluster{}, err
|
||||
}
|
||||
if config.Keys.Validate {
|
||||
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
|
||||
log.Warnf("Validate cluster config: %v\n", err)
|
||||
return &schema.Cluster{}, fmt.Errorf("Validate cluster config: %v\n", err)
|
||||
}
|
||||
}
|
||||
@ -121,13 +122,13 @@ func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
|
||||
go func() {
|
||||
clustersDir, err := os.ReadDir(fsa.path)
|
||||
if err != nil {
|
||||
log.Fatalf("Reading clusters failed: %s", err.Error())
|
||||
log.Fatalf("Reading clusters failed @ cluster dirs: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, clusterDir := range clustersDir {
|
||||
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
|
||||
if err != nil {
|
||||
log.Fatalf("Reading jobs failed: %s", err.Error())
|
||||
log.Fatalf("Reading jobs failed @ lvl1 dirs: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, lvl1Dir := range lvl1Dirs {
|
||||
@ -138,21 +139,21 @@ func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
|
||||
|
||||
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name()))
|
||||
if err != nil {
|
||||
log.Fatalf("Reading jobs failed: %s", err.Error())
|
||||
log.Fatalf("Reading jobs failed @ lvl2 dirs: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, lvl2Dir := range lvl2Dirs {
|
||||
dirpath := filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
|
||||
startTimeDirs, err := os.ReadDir(dirpath)
|
||||
if err != nil {
|
||||
log.Fatalf("Reading jobs failed: %s", err.Error())
|
||||
log.Fatalf("Reading jobs failed @ starttime dirs: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, startTimeDir := range startTimeDirs {
|
||||
if startTimeDir.IsDir() {
|
||||
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
|
||||
if err != nil {
|
||||
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||
log.Errorf("error in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||
} else {
|
||||
ch <- job
|
||||
}
|
||||
@ -175,12 +176,15 @@ func (fsa *FsArchive) StoreJobMeta(jobMeta *schema.JobMeta) error {
|
||||
}
|
||||
f, err := os.Create(getPath(&job, fsa.path, "meta.json"))
|
||||
if err != nil {
|
||||
log.Error("Error while creating filepath for meta.json")
|
||||
return err
|
||||
}
|
||||
if err := EncodeJobMeta(f, jobMeta); err != nil {
|
||||
log.Error("Error while encoding job metadata to meta.json file")
|
||||
return err
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
log.Warn("Error while closing meta.json file")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -203,26 +207,38 @@ func (fsa *FsArchive) ImportJob(
|
||||
}
|
||||
dir := getPath(&job, fsa.path, "")
|
||||
if err := os.MkdirAll(dir, 0777); err != nil {
|
||||
log.Error("Error while creating job archive path")
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Create(path.Join(dir, "meta.json"))
|
||||
if err != nil {
|
||||
log.Error("Error while creating filepath for meta.json")
|
||||
return err
|
||||
}
|
||||
if err := EncodeJobMeta(f, jobMeta); err != nil {
|
||||
log.Error("Error while encoding job metadata to meta.json file")
|
||||
return err
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
log.Warn("Error while closing meta.json file")
|
||||
return err
|
||||
}
|
||||
|
||||
f, err = os.Create(path.Join(dir, "data.json"))
|
||||
if err != nil {
|
||||
log.Error("Error while creating filepath for data.json")
|
||||
return err
|
||||
}
|
||||
if err := EncodeJobData(f, jobData); err != nil {
|
||||
log.Error("Error while encoding job metricdata to data.json file")
|
||||
return err
|
||||
}
|
||||
return f.Close()
|
||||
if err := f.Close(); err != nil {
|
||||
log.Warn("Error while closing data.json file")
|
||||
return err
|
||||
}
|
||||
|
||||
// no error: final return is nil
|
||||
return nil
|
||||
}
|
||||
|
@ -10,9 +10,14 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.Init("info", true)
|
||||
}
|
||||
|
||||
func TestInitEmptyPath(t *testing.T) {
|
||||
var fsa FsArchive
|
||||
err := fsa.Init(json.RawMessage("{\"kind\":\"../../test/archive\"}"))
|
||||
|
@ -10,12 +10,14 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
||||
data := cache.Get(k, func() (value interface{}, ttl time.Duration, size int) {
|
||||
var d schema.JobData
|
||||
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
||||
log.Warn("Error while decoding raw job data json")
|
||||
return err, 0, 1000
|
||||
}
|
||||
|
||||
@ -23,6 +25,7 @@ func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Warn("Error in decoded job data set")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -32,6 +35,7 @@ func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
||||
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
|
||||
var d schema.JobMeta
|
||||
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
||||
log.Warn("Error while decoding raw job meta json")
|
||||
return &d, err
|
||||
}
|
||||
|
||||
@ -43,6 +47,7 @@ func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
|
||||
func DecodeCluster(r io.Reader) (*schema.Cluster, error) {
|
||||
var c schema.Cluster
|
||||
if err := json.NewDecoder(r).Decode(&c); err != nil {
|
||||
log.Warn("Error while decoding raw cluster json")
|
||||
return &c, err
|
||||
}
|
||||
|
||||
@ -54,6 +59,7 @@ func DecodeCluster(r io.Reader) (*schema.Cluster, error) {
|
||||
func EncodeJobData(w io.Writer, d *schema.JobData) error {
|
||||
// Sanitize parameters
|
||||
if err := json.NewEncoder(w).Encode(d); err != nil {
|
||||
log.Warn("Error while encoding new job data json")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -63,6 +69,7 @@ func EncodeJobData(w io.Writer, d *schema.JobData) error {
|
||||
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
|
||||
// Sanitize parameters
|
||||
if err := json.NewEncoder(w).Encode(d); err != nil {
|
||||
log.Warn("Error while encoding new job meta json")
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,7 @@ type NLExprIntRange struct {
|
||||
|
||||
func (nle NLExprIntRange) consume(input string) (next string, ok bool) {
|
||||
if !nle.zeroPadded || nle.digits < 1 {
|
||||
log.Error("node list: only zero-padded ranges are allowed")
|
||||
log.Error("only zero-padded ranges are allowed")
|
||||
return "", false
|
||||
}
|
||||
|
||||
@ -102,7 +102,7 @@ func ParseNodeList(raw string) (NodeList, error) {
|
||||
i++
|
||||
}
|
||||
if i == len(raw) {
|
||||
return nil, fmt.Errorf("node list: unclosed '['")
|
||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
|
||||
}
|
||||
} else if raw[i] == ',' {
|
||||
rawterms = append(rawterms, raw[prevterm:i])
|
||||
@ -135,7 +135,7 @@ func ParseNodeList(raw string) (NodeList, error) {
|
||||
end := strings.Index(rawterm[i:], "]")
|
||||
|
||||
if end == -1 {
|
||||
return nil, fmt.Errorf("node list: unclosed '['")
|
||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
|
||||
}
|
||||
|
||||
parts := strings.Split(rawterm[i+1:i+end], ",")
|
||||
@ -144,21 +144,21 @@ func ParseNodeList(raw string) (NodeList, error) {
|
||||
for _, part := range parts {
|
||||
minus := strings.Index(part, "-")
|
||||
if minus == -1 {
|
||||
return nil, fmt.Errorf("node list: no '-' found inside '[...]'")
|
||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > no '-' found inside '[...]'")
|
||||
}
|
||||
|
||||
s1, s2 := part[0:minus], part[minus+1:]
|
||||
if len(s1) != len(s2) || len(s1) == 0 {
|
||||
return nil, fmt.Errorf("node list: %#v and %#v are not of equal length or of length zero", s1, s2)
|
||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > %v and %v are not of equal length or of length zero", s1, s2)
|
||||
}
|
||||
|
||||
x1, err := strconv.ParseInt(s1, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("node list: %w", err)
|
||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > could not parse int: %w", err)
|
||||
}
|
||||
x2, err := strconv.ParseInt(s2, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("node list: %w", err)
|
||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > could not parse int: %w", err)
|
||||
}
|
||||
|
||||
nles = append(nles, NLExprIntRange{
|
||||
@ -172,7 +172,7 @@ func ParseNodeList(raw string) (NodeList, error) {
|
||||
exprs = append(exprs, nles)
|
||||
i += end
|
||||
} else {
|
||||
return nil, fmt.Errorf("node list: invalid character: %#v", rune(c))
|
||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > invalid character: %#v", rune(c))
|
||||
}
|
||||
}
|
||||
nl = append(nl, exprs)
|
||||
|
181
pkg/log/log.go
181
pkg/log/log.go
@ -12,8 +12,8 @@ import (
|
||||
)
|
||||
|
||||
// Provides a simple way of logging with different levels.
|
||||
// Time/Data are not logged on purpose because systemd adds
|
||||
// them for us.
|
||||
// Time/Date are not logged because systemd adds
|
||||
// them for us (Default, can be changed by flag '--logdate true').
|
||||
//
|
||||
// Uses these prefixes: https://www.freedesktop.org/software/systemd/man/sd-daemon.html
|
||||
|
||||
@ -22,109 +22,162 @@ var (
|
||||
InfoWriter io.Writer = os.Stderr
|
||||
WarnWriter io.Writer = os.Stderr
|
||||
ErrWriter io.Writer = os.Stderr
|
||||
CritWriter io.Writer = os.Stderr
|
||||
)
|
||||
|
||||
var (
|
||||
DebugPrefix string = "<7>[DEBUG] "
|
||||
InfoPrefix string = "<6>[INFO] "
|
||||
WarnPrefix string = "<4>[WARNING] "
|
||||
ErrPrefix string = "<3>[ERROR] "
|
||||
DebugPrefix string = "<7>[DEBUG] "
|
||||
InfoPrefix string = "<6>[INFO] "
|
||||
WarnPrefix string = "<4>[WARNING] "
|
||||
ErrPrefix string = "<3>[ERROR] "
|
||||
CritPrefix string = "<2>[CRITICAL] "
|
||||
)
|
||||
|
||||
var (
|
||||
DebugLog *log.Logger = log.New(DebugWriter, DebugPrefix, 0)
|
||||
InfoLog *log.Logger = log.New(InfoWriter, InfoPrefix, 0)
|
||||
WarnLog *log.Logger = log.New(WarnWriter, WarnPrefix, 0)
|
||||
ErrLog *log.Logger = log.New(ErrWriter, ErrPrefix, 0)
|
||||
DebugLog *log.Logger
|
||||
InfoLog *log.Logger
|
||||
WarnLog *log.Logger
|
||||
ErrLog *log.Logger
|
||||
CritLog *log.Logger
|
||||
)
|
||||
|
||||
func init() {
|
||||
if lvl, ok := os.LookupEnv("LOGLEVEL"); ok {
|
||||
switch lvl {
|
||||
case "err", "fatal":
|
||||
WarnWriter = io.Discard
|
||||
fallthrough
|
||||
case "warn":
|
||||
InfoWriter = io.Discard
|
||||
fallthrough
|
||||
case "info":
|
||||
DebugWriter = io.Discard
|
||||
case "debug":
|
||||
// Nothing to do...
|
||||
default:
|
||||
Warnf("environment variable LOGLEVEL has invalid value %#v", lvl)
|
||||
}
|
||||
/* CONFIG */
|
||||
|
||||
func Init(lvl string, logdate bool) {
|
||||
switch lvl {
|
||||
case "crit":
|
||||
ErrWriter = io.Discard
|
||||
fallthrough
|
||||
case "err", "fatal":
|
||||
WarnWriter = io.Discard
|
||||
fallthrough
|
||||
case "warn":
|
||||
InfoWriter = io.Discard
|
||||
fallthrough
|
||||
case "info":
|
||||
DebugWriter = io.Discard
|
||||
case "debug":
|
||||
// Nothing to do...
|
||||
break
|
||||
default:
|
||||
fmt.Printf("pkg/log: Flag 'loglevel' has invalid value %#v\npkg/log: Will use default loglevel 'debug'\n", lvl)
|
||||
//SetLogLevel("debug")
|
||||
}
|
||||
|
||||
if !logdate {
|
||||
DebugLog = log.New(DebugWriter, DebugPrefix, 0)
|
||||
InfoLog = log.New(InfoWriter, InfoPrefix, log.Lshortfile)
|
||||
WarnLog = log.New(WarnWriter, WarnPrefix, log.Lshortfile)
|
||||
ErrLog = log.New(ErrWriter, ErrPrefix, log.Llongfile)
|
||||
CritLog = log.New(CritWriter, CritPrefix, log.Llongfile)
|
||||
} else {
|
||||
DebugLog = log.New(DebugWriter, DebugPrefix, log.LstdFlags)
|
||||
InfoLog = log.New(InfoWriter, InfoPrefix, log.LstdFlags|log.Lshortfile)
|
||||
WarnLog = log.New(WarnWriter, WarnPrefix, log.LstdFlags|log.Lshortfile)
|
||||
ErrLog = log.New(ErrWriter, ErrPrefix, log.LstdFlags|log.Llongfile)
|
||||
CritLog = log.New(CritWriter, CritPrefix, log.LstdFlags|log.Llongfile)
|
||||
}
|
||||
}
|
||||
|
||||
func Debug(v ...interface{}) {
|
||||
if DebugWriter != io.Discard {
|
||||
DebugLog.Print(v...)
|
||||
}
|
||||
}
|
||||
|
||||
func Info(v ...interface{}) {
|
||||
if InfoWriter != io.Discard {
|
||||
InfoLog.Print(v...)
|
||||
}
|
||||
/* PRINT */
|
||||
|
||||
// Private helper
|
||||
func printStr(v ...interface{}) string {
|
||||
return fmt.Sprint(v...)
|
||||
}
|
||||
|
||||
// Uses Info() -> If errorpath required at some point:
|
||||
// Will need own writer with 'Output(2, out)' to correctly render path
|
||||
func Print(v ...interface{}) {
|
||||
Info(v...)
|
||||
}
|
||||
|
||||
func Debug(v ...interface{}) {
|
||||
DebugLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
func Info(v ...interface{}) {
|
||||
InfoLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
func Warn(v ...interface{}) {
|
||||
if WarnWriter != io.Discard {
|
||||
WarnLog.Print(v...)
|
||||
}
|
||||
WarnLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
func Error(v ...interface{}) {
|
||||
if ErrWriter != io.Discard {
|
||||
ErrLog.Print(v...)
|
||||
}
|
||||
ErrLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Writes panic stacktrace, but keeps application alive
|
||||
func Panic(v ...interface{}) {
|
||||
ErrLog.Output(2, printStr(v...))
|
||||
panic("Panic triggered ...")
|
||||
}
|
||||
|
||||
func Crit(v ...interface{}) {
|
||||
CritLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Writes critical log, stops application
|
||||
func Fatal(v ...interface{}) {
|
||||
Error(v...)
|
||||
CritLog.Output(2, printStr(v...))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func Debugf(format string, v ...interface{}) {
|
||||
if DebugWriter != io.Discard {
|
||||
DebugLog.Printf(format, v...)
|
||||
}
|
||||
}
|
||||
|
||||
func Infof(format string, v ...interface{}) {
|
||||
if InfoWriter != io.Discard {
|
||||
InfoLog.Printf(format, v...)
|
||||
}
|
||||
/* PRINT FORMAT*/
|
||||
|
||||
// Private helper
|
||||
func printfStr(format string, v ...interface{}) string {
|
||||
return fmt.Sprintf(format, v...)
|
||||
}
|
||||
|
||||
// Uses Infof() -> If errorpath required at some point:
|
||||
// Will need own writer with 'Output(2, out)' to correctly render path
|
||||
func Printf(format string, v ...interface{}) {
|
||||
Infof(format, v...)
|
||||
}
|
||||
|
||||
func Finfof(w io.Writer, format string, v ...interface{}) {
|
||||
if w != io.Discard {
|
||||
fmt.Fprintf(InfoWriter, InfoPrefix+format+"\n", v...)
|
||||
}
|
||||
func Debugf(format string, v ...interface{}) {
|
||||
DebugLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
func Infof(format string, v ...interface{}) {
|
||||
InfoLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
func Warnf(format string, v ...interface{}) {
|
||||
if WarnWriter != io.Discard {
|
||||
WarnLog.Printf(format, v...)
|
||||
}
|
||||
WarnLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
func Errorf(format string, v ...interface{}) {
|
||||
if ErrWriter != io.Discard {
|
||||
ErrLog.Printf(format, v...)
|
||||
}
|
||||
ErrLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Writes panic stacktrace, but keeps application alive
|
||||
func Panicf(format string, v ...interface{}) {
|
||||
ErrLog.Output(2, printfStr(format, v...))
|
||||
panic("Panic triggered ...")
|
||||
}
|
||||
|
||||
func Critf(format string, v ...interface{}) {
|
||||
CritLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Writes crit log, stops application
|
||||
func Fatalf(format string, v ...interface{}) {
|
||||
Errorf(format, v...)
|
||||
CritLog.Output(2, printfStr(format, v...))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
/* SPECIAL */
|
||||
|
||||
// func Finfof(w io.Writer, format string, v ...interface{}) {
|
||||
// if w != io.Discard {
|
||||
// if logDateTime {
|
||||
// currentTime := time.Now()
|
||||
// fmt.Fprintf(InfoWriter, currentTime.String()+InfoPrefix+format+"\n", v...)
|
||||
// } else {
|
||||
// fmt.Fprintf(InfoWriter, InfoPrefix+format+"\n", v...)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
@ -69,7 +69,7 @@ func (c *Cache) Get(key string, computeValue ComputeValue) interface{} {
|
||||
if now.After(entry.expiration) {
|
||||
if !c.evictEntry(entry) {
|
||||
if entry.expiration.IsZero() {
|
||||
panic("cache entry that shoud have been waited for could not be evicted.")
|
||||
panic("LRUCACHE/CACHE > cache entry that shoud have been waited for could not be evicted.")
|
||||
}
|
||||
c.mutex.Unlock()
|
||||
return entry.value
|
||||
@ -208,7 +208,7 @@ func (c *Cache) Keys(f func(key string, val interface{})) {
|
||||
size := 0
|
||||
for key, e := range c.entries {
|
||||
if key != e.key {
|
||||
panic("key mismatch")
|
||||
panic("LRUCACHE/CACHE > key mismatch")
|
||||
}
|
||||
|
||||
if now.After(e.expiration) {
|
||||
@ -219,13 +219,13 @@ func (c *Cache) Keys(f func(key string, val interface{})) {
|
||||
|
||||
if e.prev != nil {
|
||||
if e.prev.next != e {
|
||||
panic("list corrupted")
|
||||
panic("LRUCACHE/CACHE > list corrupted")
|
||||
}
|
||||
}
|
||||
|
||||
if e.next != nil {
|
||||
if e.next.prev != e {
|
||||
panic("list corrupted")
|
||||
panic("LRUCACHE/CACHE > list corrupted")
|
||||
}
|
||||
}
|
||||
|
||||
@ -234,18 +234,18 @@ func (c *Cache) Keys(f func(key string, val interface{})) {
|
||||
}
|
||||
|
||||
if size != c.usedmemory {
|
||||
panic("size calculations failed")
|
||||
panic("LRUCACHE/CACHE > size calculations failed")
|
||||
}
|
||||
|
||||
if c.head != nil {
|
||||
if c.tail == nil || c.head.prev != nil {
|
||||
panic("head/tail corrupted")
|
||||
panic("LRUCACHE/CACHE > head/tail corrupted")
|
||||
}
|
||||
}
|
||||
|
||||
if c.tail != nil {
|
||||
if c.head == nil || c.tail.next != nil {
|
||||
panic("head/tail corrupted")
|
||||
panic("LRUCACHE/CACHE > head/tail corrupted")
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -281,7 +281,7 @@ func (c *Cache) unlinkEntry(e *cacheEntry) {
|
||||
|
||||
func (c *Cache) evictEntry(e *cacheEntry) bool {
|
||||
if e.waitingForComputation != 0 {
|
||||
// panic("cannot evict this entry as other goroutines need the value")
|
||||
// panic("LRUCACHE/CACHE > cannot evict this entry as other goroutines need the value")
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,8 @@ import (
|
||||
"io"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
// A custom float type is used so that (Un)MarshalJSON and
|
||||
@ -43,6 +45,7 @@ func (f *Float) UnmarshalJSON(input []byte) error {
|
||||
|
||||
val, err := strconv.ParseFloat(s, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing custom float")
|
||||
return err
|
||||
}
|
||||
*f = Float(val)
|
||||
|
@ -133,12 +133,12 @@ const (
|
||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
return fmt.Errorf("SCHEMA/JOB > enums must be strings")
|
||||
}
|
||||
|
||||
*e = JobState(str)
|
||||
if !e.Valid() {
|
||||
return errors.New("invalid job state")
|
||||
return errors.New("SCHEMA/JOB > invalid job state")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -92,12 +92,12 @@ func (e *MetricScope) Max(other MetricScope) MetricScope {
|
||||
func (e *MetricScope) UnmarshalGQL(v interface{}) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
return fmt.Errorf("SCHEMA/METRICS > enums must be strings")
|
||||
}
|
||||
|
||||
*e = MetricScope(str)
|
||||
if !e.Valid() {
|
||||
return fmt.Errorf("%s is not a valid MetricScope", str)
|
||||
return fmt.Errorf("SCHEMA/METRICS > %s is not a valid MetricScope", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -303,7 +303,7 @@ func (jm *JobMetric) AddPercentiles(ps []int) bool {
|
||||
|
||||
for _, p := range ps {
|
||||
if p < 1 || p > 99 {
|
||||
panic("invalid percentile")
|
||||
panic("SCHEMA/METRICS > invalid percentile")
|
||||
}
|
||||
|
||||
if _, ok := jm.StatisticsSeries.Percentiles[p]; ok {
|
||||
|
@ -45,21 +45,22 @@ func Validate(k Kind, r io.Reader) (err error) {
|
||||
case Config:
|
||||
s, err = jsonschema.Compile("embedfs://config.schema.json")
|
||||
default:
|
||||
return fmt.Errorf("unkown schema kind ")
|
||||
return fmt.Errorf("SCHEMA/VALIDATE > unkown schema kind: %#v", k)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Error while compiling json schema for kind '%#v'", k)
|
||||
return err
|
||||
}
|
||||
|
||||
var v interface{}
|
||||
if err := json.NewDecoder(r).Decode(&v); err != nil {
|
||||
log.Errorf("schema.Validate() - Failed to decode %v", err)
|
||||
log.Warnf("Error while decoding raw json schema: %#v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err = s.Validate(v); err != nil {
|
||||
return fmt.Errorf("%#v", err)
|
||||
return fmt.Errorf("SCHEMA/VALIDATE > %#v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -192,7 +192,7 @@ func GetUnitUnitFactor(in Unit, out Unit) (func(value interface{}) interface{},
|
||||
} else if in.getMeasure() == TemperatureF && out.getMeasure() == TemperatureC {
|
||||
return convertTempF2TempC, nil
|
||||
} else if in.getMeasure() != out.getMeasure() || in.getUnitDenominator() != out.getUnitDenominator() {
|
||||
return func(value interface{}) interface{} { return 1.0 }, fmt.Errorf("invalid measures in in and out Unit")
|
||||
return func(value interface{}) interface{} { return 1.0 }, fmt.Errorf("UNITS/UNITS > invalid measures in in and out Unit")
|
||||
}
|
||||
return GetPrefixPrefixFactor(in.getPrefix(), out.getPrefix()), nil
|
||||
}
|
||||
|
@ -11,7 +11,6 @@ else
|
||||
tar xJf job-archive-dev.tar.xz
|
||||
rm ./job-archive-dev.tar.xz
|
||||
|
||||
touch ./job.db
|
||||
cd ../web/frontend
|
||||
yarn install
|
||||
yarn build
|
||||
@ -21,5 +20,6 @@ else
|
||||
cp ./docs/config.json config.json
|
||||
go build ./cmd/cc-backend
|
||||
|
||||
./cc-backend --migrate-db
|
||||
./cc-backend --server --dev --init-db --add-user demo:admin:AdminDev
|
||||
fi
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/gorilla/mux"
|
||||
|
||||
@ -245,6 +246,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
]
|
||||
}`
|
||||
|
||||
log.Init("info", true)
|
||||
tmpdir := t.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||
@ -267,11 +269,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
dbfilepath := filepath.Join(tmpdir, "test.db")
|
||||
f, err := os.Create(dbfilepath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
f.Close()
|
||||
repository.MigrateDB("sqlite3", dbfilepath)
|
||||
|
||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
|
||||
@ -292,10 +290,6 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := db.DB.Exec(repository.JobsDBSchema); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
<script>
|
||||
import { Icon, Button, InputGroup, Input, Collapse,
|
||||
Navbar, NavbarBrand, Nav, NavItem, NavLink, NavbarToggler,
|
||||
Dropdown, DropdownToggle, DropdownMenu, DropdownItem } from 'sveltestrap'
|
||||
Dropdown, DropdownToggle, DropdownMenu, DropdownItem, InputGroupText } from 'sveltestrap'
|
||||
|
||||
export let username // empty string if auth. is disabled, otherwise the username as string
|
||||
export let authlevel // integer
|
||||
@ -93,6 +93,7 @@
|
||||
<InputGroup>
|
||||
<Input type="text" placeholder={(authlevel >= 4) ? "Search jobId / username" : "Search jobId"} name="searchId"/>
|
||||
<Button outline type="submit"><Icon name="search"/></Button>
|
||||
<InputGroupText style="cursor:help;" title={isAdmin ? "Example: 'projectId:a100cd', Types are: jobId | jobName | projectId | username" | "name" : "Example: 'jobName:myjob', Types are jobId | jobName"}><Icon name="info-circle"/></InputGroupText>
|
||||
</InputGroup>
|
||||
</form>
|
||||
{#if username}
|
||||
|
@ -20,6 +20,7 @@
|
||||
const stats = operationStore(`query($filter: [JobFilter!]!) {
|
||||
rows: jobsStatistics(filter: $filter, groupBy: ${type}) {
|
||||
id
|
||||
name
|
||||
totalJobs
|
||||
totalWalltime
|
||||
totalCoreHours
|
||||
@ -93,6 +94,15 @@
|
||||
<Icon name="sort-numeric-down" />
|
||||
</Button>
|
||||
</th>
|
||||
{#if type == 'USER'}
|
||||
<th scope="col">
|
||||
Name
|
||||
<Button color="{sorting.field == 'name' ? 'primary' : 'light'}"
|
||||
size="sm" on:click={e => changeSorting(e, 'name')}>
|
||||
<Icon name="sort-numeric-down" />
|
||||
</Button>
|
||||
</th>
|
||||
{/if}
|
||||
<th scope="col">
|
||||
Total Jobs
|
||||
<Button color="{sorting.field == 'totalJobs' ? 'primary' : 'light'}"
|
||||
@ -137,6 +147,9 @@
|
||||
{row.id}
|
||||
{/if}
|
||||
</td>
|
||||
{#if type == 'USER'}
|
||||
<td>{row?.name ? row.name : ''}</td>
|
||||
{/if}
|
||||
<td>{row.totalJobs}</td>
|
||||
<td>{row.totalWalltime}</td>
|
||||
<td>{row.totalCoreHours}</td>
|
||||
|
@ -35,16 +35,17 @@
|
||||
projectMatch: filterPresets.projectMatch || 'contains',
|
||||
userMatch: filterPresets.userMatch || 'contains',
|
||||
|
||||
cluster: filterPresets.cluster || null,
|
||||
partition: filterPresets.partition || null,
|
||||
states: filterPresets.states || filterPresets.state ? [filterPresets.state].flat() : allJobStates,
|
||||
startTime: filterPresets.startTime || { from: null, to: null },
|
||||
tags: filterPresets.tags || [],
|
||||
duration: filterPresets.duration || { from: null, to: null },
|
||||
jobId: filterPresets.jobId || '',
|
||||
arrayJobId: filterPresets.arrayJobId || null,
|
||||
user: filterPresets.user || '',
|
||||
project: filterPresets.project || '',
|
||||
cluster: filterPresets.cluster || null,
|
||||
partition: filterPresets.partition || null,
|
||||
states: filterPresets.states || filterPresets.state ? [filterPresets.state].flat() : allJobStates,
|
||||
startTime: filterPresets.startTime || { from: null, to: null },
|
||||
tags: filterPresets.tags || [],
|
||||
duration: filterPresets.duration || { from: null, to: null },
|
||||
jobId: filterPresets.jobId || '',
|
||||
arrayJobId: filterPresets.arrayJobId || null,
|
||||
user: filterPresets.user || '',
|
||||
project: filterPresets.project || '',
|
||||
jobName: filterPresets.jobName || '',
|
||||
|
||||
numNodes: filterPresets.numNodes || { from: null, to: null },
|
||||
numHWThreads: filterPresets.numHWThreads || { from: null, to: null },
|
||||
@ -94,6 +95,8 @@
|
||||
items.push({ user: { [filters.userMatch]: filters.user } })
|
||||
if (filters.project)
|
||||
items.push({ project: { [filters.projectMatch]: filters.project } })
|
||||
if (filters.jobName)
|
||||
items.push({ jobName: { contains: filters.jobName } })
|
||||
for (let stat of filters.stats)
|
||||
items.push({ [stat.field]: { from: stat.from, to: stat.to } })
|
||||
|
||||
@ -123,12 +126,19 @@
|
||||
opts.push(`numNodes=${filters.numNodes.from}-${filters.numNodes.to}`)
|
||||
if (filters.numAccelerators.from && filters.numAccelerators.to)
|
||||
opts.push(`numAccelerators=${filters.numAccelerators.from}-${filters.numAccelerators.to}`)
|
||||
if (filters.user)
|
||||
opts.push(`user=${filters.user}`)
|
||||
if (filters.user.length != 0)
|
||||
if (filters.userMatch != 'in') {
|
||||
opts.push(`user=${filters.user}`)
|
||||
} else {
|
||||
for (let singleUser of filters.user)
|
||||
opts.push(`user=${singleUser}`)
|
||||
}
|
||||
if (filters.userMatch != 'contains')
|
||||
opts.push(`userMatch=${filters.userMatch}`)
|
||||
if (filters.project)
|
||||
opts.push(`project=${filters.project}`)
|
||||
if (filters.jobName)
|
||||
opts.push(`jobName=${filters.jobName}`)
|
||||
if (filters.projectMatch != 'contains')
|
||||
opts.push(`projectMatch=${filters.projectMatch}`)
|
||||
|
||||
|
@ -52,7 +52,10 @@
|
||||
{/if}
|
||||
{#if job.project && job.project != 'no project'}
|
||||
<br/>
|
||||
<Icon name="people-fill"/> {job.project}
|
||||
<Icon name="people-fill"/>
|
||||
<a class="fst-italic" href="/monitoring/jobs/?project={job.project}&projectMatch=eq" target="_blank">
|
||||
{scrambleNames ? scramble(job.project) : job.project}
|
||||
</a>
|
||||
{/if}
|
||||
</p>
|
||||
|
||||
|
@ -33,7 +33,7 @@
|
||||
query($filter: [JobFilter!]!, $sorting: OrderByInput!, $paging: PageRequest! ){
|
||||
jobs(filter: $filter, order: $sorting, page: $paging) {
|
||||
items {
|
||||
id, jobId, user, project, cluster, subCluster, startTime,
|
||||
id, jobId, user, project, jobName, cluster, subCluster, startTime,
|
||||
duration, numNodes, numHWThreads, numAcc, walltime,
|
||||
SMT, exclusive, partition, arrayJobId,
|
||||
monitoringStatus, state,
|
||||
|
@ -230,7 +230,7 @@ commondir@^1.0.1:
|
||||
concat-map@0.0.1:
|
||||
version "0.0.1"
|
||||
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
|
||||
integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
|
||||
integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==
|
||||
|
||||
deepmerge@^4.2.2:
|
||||
version "4.2.2"
|
||||
@ -365,9 +365,9 @@ merge-stream@^2.0.0:
|
||||
integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
|
||||
|
||||
minimatch@^3.0.4:
|
||||
version "3.0.4"
|
||||
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
|
||||
integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==
|
||||
version "3.1.2"
|
||||
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b"
|
||||
integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==
|
||||
dependencies:
|
||||
brace-expansion "^1.1.7"
|
||||
|
||||
|
@ -24,6 +24,7 @@ var frontendFiles embed.FS
|
||||
func ServeFiles() http.Handler {
|
||||
publicFiles, err := fs.Sub(frontendFiles, "frontend/public")
|
||||
if err != nil {
|
||||
log.Fatalf("WEB/WEB > cannot find frontend public files")
|
||||
panic(err)
|
||||
}
|
||||
return http.FileServer(http.FS(publicFiles))
|
||||
@ -47,6 +48,7 @@ func init() {
|
||||
templates[strings.TrimPrefix(path, "templates/")] = template.Must(template.Must(base.Clone()).ParseFS(templateFiles, path))
|
||||
return nil
|
||||
}); err != nil {
|
||||
log.Fatalf("WEB/WEB > cannot find frontend template files")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
@ -79,6 +81,7 @@ type Page struct {
|
||||
func RenderTemplate(rw http.ResponseWriter, r *http.Request, file string, page *Page) {
|
||||
t, ok := templates[file]
|
||||
if !ok {
|
||||
log.Fatalf("WEB/WEB > template '%s' not found", file)
|
||||
panic("template not found")
|
||||
}
|
||||
|
||||
@ -88,8 +91,8 @@ func RenderTemplate(rw http.ResponseWriter, r *http.Request, file string, page *
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("%v\n", page.Config)
|
||||
log.Infof("Page config : %v\n", page.Config)
|
||||
if err := t.Execute(rw, page); err != nil {
|
||||
log.Errorf("template error: %s", err.Error())
|
||||
log.Errorf("Template error: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user