Merge branch 'master' into 40_45_82_update_roles

This commit is contained in:
Christoph Kluge 2023-02-21 17:17:41 +01:00
commit e0e51813ad
66 changed files with 3132 additions and 826 deletions

View File

@ -30,6 +30,7 @@ SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(TARGET): $(VAR) $(SVELTE_TARGETS) $(TARGET): $(VAR) $(SVELTE_TARGETS)
$(info ===> BUILD cc-backend) $(info ===> BUILD cc-backend)
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend @go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
./cc-backend --migrate-db
clean: clean:
$(info ===> CLEAN) $(info ===> CLEAN)
@ -48,5 +49,4 @@ $(SVELTE_TARGETS): $(SVELTE_SRC)
$(VAR): $(VAR):
@mkdir $(VAR) @mkdir $(VAR)
@touch ./var/job.db
cd web/frontend && yarn install cd web/frontend && yarn install

View File

@ -98,6 +98,15 @@ A config file in the JSON format has to be provided using `--config` to override
By default, if there is a `config.json` file in the current directory of the `cc-backend` process, it will be loaded even without the `--config` flag. By default, if there is a `config.json` file in the current directory of the `cc-backend` process, it will be loaded even without the `--config` flag.
You find documentation of all supported configuration and command line options [here](./configs.README.md). You find documentation of all supported configuration and command line options [here](./configs.README.md).
## Database initialization and migration
Every cc-backend version supports a specific database version.
On startup the version of the sqlite database is validated and cc-backend will terminate if the version does not match.
cc-backend supports to migrate the database schema up to the required version using the `--migrate-db` command line option.
In case the database file does not yet exist it is created and initialized by the `--migrate-db` command line option.
In case you want to use a newer database version with an olden version of cc-backend you can downgrade a database using the external [migrate](https://github.com/golang-migrate/migrate) tool.
In this case you have to provide the path to the migration files in a recent source tree: `./internal/repository/migrations/`.
## Development ## Development
In case the REST or GraphQL API is changed the according code generators have to be used. In case the REST or GraphQL API is changed the according code generators have to be used.

View File

@ -10,6 +10,7 @@ type Job {
jobId: Int! jobId: Int!
user: String! user: String!
project: String! project: String!
jobName: String
cluster: String! cluster: String!
subCluster: String! subCluster: String!
startTime: Time! startTime: Time!
@ -197,14 +198,15 @@ type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { from: Time!, to: Time! } type TimeRangeOutput { from: Time!, to: Time! }
input JobFilter { input JobFilter {
tags: [ID!] tags: [ID!]
jobId: StringInput jobId: StringInput
arrayJobId: Int arrayJobId: Int
user: StringInput user: StringInput
project: StringInput project: StringInput
cluster: StringInput jobName: StringInput
partition: StringInput cluster: StringInput
duration: IntRange partition: StringInput
duration: IntRange
minRunningFor: Int minRunningFor: Int
@ -235,6 +237,7 @@ input StringInput {
contains: String contains: String
startsWith: String startsWith: String
endsWith: String endsWith: String
in: [String!]
} }
input IntRange { from: Int!, to: Int! } input IntRange { from: Int!, to: Int! }
@ -255,6 +258,7 @@ type HistoPoint {
type JobsStatistics { type JobsStatistics {
id: ID! # If `groupBy` was used, ID of the user/project/cluster id: ID! # If `groupBy` was used, ID of the user/project/cluster
name: String # if User-Statistics: Given Name of Account (ID) Owner
totalJobs: Int! # Number of jobs that matched totalJobs: Int! # Number of jobs that matched
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
totalWalltime: Int! # Sum of the duration of all matched jobs in hours totalWalltime: Int! # Sum of the duration of all matched jobs in hours

View File

@ -13,7 +13,6 @@ import (
"io" "io"
"net" "net"
"net/http" "net/http"
"net/url"
"os" "os"
"os/signal" "os/signal"
"runtime" "runtime"
@ -62,19 +61,22 @@ var (
) )
func main() { func main() {
var flagReinitDB, flagServer, flagSyncLDAP, flagGops, flagDev, flagVersion bool var flagReinitDB, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagDev, flagVersion, flagLogDateTime bool
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob string var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)") flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap") flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling") flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)") flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI") flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit") flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`") flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,api,user]:<password>`") flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,api,user]:<password>`")
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`") flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`") flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`") flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.StringVar(&flagLogLevel, "loglevel", "debug", "Sets the logging level: `[debug (default),info,warn,err,fatal,crit]`")
flag.Parse() flag.Parse()
if flagVersion { if flagVersion {
@ -85,6 +87,9 @@ func main() {
os.Exit(0) os.Exit(0)
} }
// Apply config flags for pkg/log
log.Init(flagLogLevel, flagLogDateTime)
// See https://github.com/google/gops (Runtime overhead is almost zero) // See https://github.com/google/gops (Runtime overhead is almost zero)
if flagGops { if flagGops {
if err := agent.Listen(agent.Options{}); err != nil { if err := agent.Listen(agent.Options{}); err != nil {
@ -108,6 +113,11 @@ func main() {
config.Keys.DB = os.Getenv(envvar) config.Keys.DB = os.Getenv(envvar)
} }
if flagMigrateDB {
repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
os.Exit(0)
}
repository.Connect(config.Keys.DBDriver, config.Keys.DB) repository.Connect(config.Keys.DBDriver, config.Keys.DB)
db := repository.GetConnection() db := repository.GetConnection()
@ -118,7 +128,7 @@ func main() {
"ldap": config.Keys.LdapConfig, "ldap": config.Keys.LdapConfig,
"jwt": config.Keys.JwtConfig, "jwt": config.Keys.JwtConfig,
}); err != nil { }); err != nil {
log.Fatal(err) log.Fatalf("auth initialization failed: %v", err)
} }
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil { if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
@ -135,12 +145,12 @@ func main() {
if err := authentication.AddUser(&auth.User{ if err := authentication.AddUser(&auth.User{
Username: parts[0], Projects: emptyPrj, Password: parts[2], Roles: strings.Split(parts[1], ","), Username: parts[0], Projects: emptyPrj, Password: parts[2], Roles: strings.Split(parts[1], ","),
}); err != nil { }); err != nil {
log.Fatal(err) log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err)
} }
} }
if flagDelUser != "" { if flagDelUser != "" {
if err := authentication.DelUser(flagDelUser); err != nil { if err := authentication.DelUser(flagDelUser); err != nil {
log.Fatal(err) log.Fatalf("deleting user failed: %v", err)
} }
} }
@ -150,7 +160,7 @@ func main() {
} }
if err := authentication.LdapAuth.Sync(); err != nil { if err := authentication.LdapAuth.Sync(); err != nil {
log.Fatal(err) log.Fatalf("LDAP sync failed: %v", err)
} }
log.Info("LDAP sync successfull") log.Info("LDAP sync successfull")
} }
@ -158,41 +168,41 @@ func main() {
if flagGenJWT != "" { if flagGenJWT != "" {
user, err := authentication.GetUser(flagGenJWT) user, err := authentication.GetUser(flagGenJWT)
if err != nil { if err != nil {
log.Fatal(err) log.Fatalf("could not get user from JWT: %v", err)
} }
if !user.HasRole(auth.RoleApi) { if !user.HasRole(auth.RoleApi) {
log.Warn("that user does not have the API role") log.Warnf("user '%s' does not have the API role", user.Username)
} }
jwt, err := authentication.JwtAuth.ProvideJWT(user) jwt, err := authentication.JwtAuth.ProvideJWT(user)
if err != nil { if err != nil {
log.Fatal(err) log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
} }
fmt.Printf("JWT for '%s': %s\n", user.Username, jwt) fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
} }
} else if flagNewUser != "" || flagDelUser != "" { } else if flagNewUser != "" || flagDelUser != "" {
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled") log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
} }
if err := archive.Init(config.Keys.Archive, config.Keys.DisableArchive); err != nil { if err := archive.Init(config.Keys.Archive, config.Keys.DisableArchive); err != nil {
log.Fatal(err) log.Fatalf("failed to initialize archive: %s", err.Error())
} }
if err := metricdata.Init(config.Keys.DisableArchive); err != nil { if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
log.Fatal(err) log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
} }
if flagReinitDB { if flagReinitDB {
if err := repository.InitDB(); err != nil { if err := repository.InitDB(); err != nil {
log.Fatal(err) log.Fatalf("failed to re-initialize repository DB: %s", err.Error())
} }
} }
if flagImportJob != "" { if flagImportJob != "" {
if err := repository.HandleImportFlag(flagImportJob); err != nil { if err := repository.HandleImportFlag(flagImportJob); err != nil {
log.Fatalf("import failed: %s", err.Error()) log.Fatalf("job import failed: %s", err.Error())
} }
} }
@ -210,12 +220,12 @@ func main() {
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error { graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
switch e := err.(type) { switch e := err.(type) {
case string: case string:
return fmt.Errorf("panic: %s", e) return fmt.Errorf("MAIN > Panic: %s", e)
case error: case error:
return fmt.Errorf("panic caused by: %w", e) return fmt.Errorf("MAIN > Panic caused by: %w", e)
} }
return errors.New("internal server error (panic)") return errors.New("MAIN > Internal server error (panic)")
}) })
} }
@ -296,28 +306,9 @@ func main() {
} }
secured.Handle("/query", graphQLEndpoint) secured.Handle("/query", graphQLEndpoint)
// Send a searchId and then reply with a redirect to a user or job. // Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) { secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
if search := r.URL.Query().Get("searchId"); search != "" { routerConfig.HandleSearchBar(rw, r, api)
job, username, err := api.JobRepository.FindJobOrUser(r.Context(), search)
if err == repository.ErrNotFound {
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(search), http.StatusTemporaryRedirect)
return
} else if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
if username != "" {
http.Redirect(rw, r, "/monitoring/user/"+username, http.StatusTemporaryRedirect)
return
} else {
http.Redirect(rw, r, fmt.Sprintf("/monitoring/job/%d", job), http.StatusTemporaryRedirect)
return
}
} else {
http.Error(rw, "'searchId' query parameter missing", http.StatusBadRequest)
}
}) })
// Mount all /monitoring/... and /api/... routes. // Mount all /monitoring/... and /api/... routes.
@ -362,7 +353,7 @@ func main() {
// Start http or https server // Start http or https server
listener, err := net.Listen("tcp", config.Keys.Addr) listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil { if err != nil {
log.Fatal(err) log.Fatalf("starting http listener failed: %v", err)
} }
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" { if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
@ -374,7 +365,7 @@ func main() {
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" { if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile) cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil { if err != nil {
log.Fatal(err) log.Fatalf("loading X509 keypair failed: %v", err)
} }
listener = tls.NewListener(listener, &tls.Config{ listener = tls.NewListener(listener, &tls.Config{
Certificates: []tls.Certificate{cert}, Certificates: []tls.Certificate{cert},
@ -392,16 +383,16 @@ func main() {
// Because this program will want to bind to a privileged port (like 80), the listener must // Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that, // be established first, then the user can be changed, and after that,
// the actuall http server can be started. // the actual http server can be started.
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil { if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Fatalf("error while changing user: %s", err.Error()) log.Fatalf("error while preparing server start: %s", err.Error())
} }
wg.Add(1) wg.Add(1)
go func() { go func() {
defer wg.Done() defer wg.Done()
if err := server.Serve(listener); err != nil && err != http.ErrServerClosed { if err := server.Serve(listener); err != nil && err != http.ErrServerClosed {
log.Fatal(err) log.Fatalf("starting server failed: %v", err)
} }
}() }()
@ -411,7 +402,7 @@ func main() {
go func() { go func() {
defer wg.Done() defer wg.Done()
<-sigs <-sigs
runtimeEnv.SystemdNotifiy(false, "shutting down") runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
// First shut down the server gracefully (waiting for all ongoing requests) // First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background()) server.Shutdown(context.Background())
@ -425,7 +416,7 @@ func main() {
for range time.Tick(30 * time.Minute) { for range time.Tick(30 * time.Minute) {
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime) err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
if err != nil { if err != nil {
log.Errorf("error while looking for jobs exceeding theire walltime: %s", err.Error()) log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
} }
runtime.GC() runtime.GC()
} }

View File

@ -40,15 +40,15 @@ Start by creating a base folder for all of the following steps.
- `cd ../..` - `cd ../..`
* Build Go Executable * Build Go Executable
- `go build ./cmd/cc-backend/` - `go build ./cmd/cc-backend/`
* Prepare Datafolder and Database file
- `mkdir var`
- `touch var/job.db`
* Activate & Config environment for cc-backend * Activate & Config environment for cc-backend
- `cp configs/env-template.txt .env` - `cp configs/env-template.txt .env`
- Optional: Have a look via `vim ./.env` - Optional: Have a look via `vim ./.env`
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./` - Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
* Back to toplevel `clustercockpit` * Back to toplevel `clustercockpit`
- `cd ..` - `cd ..`
* Prepare Datafolder and Database file
- `mkdir var`
- `./cc-backend --migrate-db`
### Setup cc-metric-store ### Setup cc-metric-store
* Clone Repository * Clone Repository

View File

@ -1,5 +1,5 @@
{ {
"addr": "0.0.0.0:8080", "addr": "127.0.0.1:8080",
"archive": { "archive": {
"kind": "file", "kind": "file",
"path": "./var/job-archive" "path": "./var/job-archive"

42
docs/searchbar.md Normal file
View File

@ -0,0 +1,42 @@
## Docs for ClusterCockpit Searchbar
### Usage
* Searchtags are implemented as `type:<query>` search-string
* Types `jobId, jobName, projectId, username, name` for roles `admin` and `support`
* `jobName` is jobName as persisted in `job.meta_data` table-column
* `username` is actual account identifier as persisted in `job.user` table-column
* `name` is account owners name as persisted in `user.name` table-column
* Types `jobId, jobName` for role `user`
* Examples:
* `jobName:myJob12`
* `jobId:123456`
* `username:abcd100`
* `name:Paul`
* If no searchTag used: Best guess search with the following hierarchy
* `jobId -> username -> name -> projectId -> jobName`
* Destinations:
* JobId: Always Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters)
* JobName: Always Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters)
* ProjectId: Always Job-Table
* Username
* If *one* match found: Opens detailed user-view (`/monitoring/user/$USER`)
* If *multiple* matches found: Opens user-table with matches listed (`/monitoring/users/`)
* **Please Note**: Only users with jobs will be shown in table! I.e., "multiple matches" can still be only one entry in table.
* Name
* If *one* matching username found: Opens detailed user-view (`/monitoring/user/$USER`)
* If *multiple* usernames found: Opens user-table with matches listed (`/monitoring/users/`)
* **Please Note**: Only users with jobs will be shown in table! I.e., "multiple matches" can still be only one entry in table.
* Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match)
* Simple HTML Error if ...
* Best guess search fails -> 'Not Found'
* Query `type` is unknown
* More than two colons in string -> 'malformed'
* Spaces trimmed (both for searchTag and queryString)
* ` job12` == `job12`
* `projectID : abcd ` == `projectId:abcd`
* `jobName`- and `name-`queries work with a part of the target-string
* `jobName:myjob` for jobName "myjob_cluster1"
* `name:Paul` for name "Paul Atreides"
* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB.

12
go.mod
View File

@ -1,6 +1,6 @@
module github.com/ClusterCockpit/cc-backend module github.com/ClusterCockpit/cc-backend
go 1.17 go 1.18
require ( require (
github.com/99designs/gqlgen v0.17.16 github.com/99designs/gqlgen v0.17.16
@ -39,10 +39,13 @@ require (
github.com/go-openapi/jsonreference v0.20.0 // indirect github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/spec v0.20.7 // indirect github.com/go-openapi/spec v0.20.7 // indirect
github.com/go-openapi/swag v0.22.3 // indirect github.com/go-openapi/swag v0.22.3 // indirect
github.com/golang-migrate/migrate/v4 v4.15.2 // indirect
github.com/golang/protobuf v1.5.2 // indirect github.com/golang/protobuf v1.5.2 // indirect
github.com/google/uuid v1.3.0 // indirect github.com/google/uuid v1.3.0 // indirect
github.com/gorilla/securecookie v1.1.1 // indirect github.com/gorilla/securecookie v1.1.1 // indirect
github.com/gorilla/websocket v1.5.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
github.com/josharian/intern v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect
@ -51,7 +54,7 @@ require (
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
@ -59,17 +62,20 @@ require (
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect
github.com/qustavo/sqlhooks/v2 v2.1.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/swaggo/files v0.0.0-20220728132757-551d4a08d97a // indirect github.com/swaggo/files v0.0.0-20220728132757-551d4a08d97a // indirect
github.com/urfave/cli/v2 v2.8.1 // indirect github.com/urfave/cli/v2 v2.8.1 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.uber.org/atomic v1.7.0 // indirect
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
golang.org/x/net v0.0.0-20220909164309-bea034e7d591 // indirect golang.org/x/net v0.0.0-20220909164309-bea034e7d591 // indirect
golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect
golang.org/x/sys v0.0.0-20220913175220-63ea55921009 // indirect golang.org/x/sys v0.0.0-20220913175220-63ea55921009 // indirect
golang.org/x/text v0.3.7 // indirect golang.org/x/text v0.3.7 // indirect
golang.org/x/tools v0.1.12 // indirect golang.org/x/tools v0.1.12 // indirect
google.golang.org/appengine v1.6.6 // indirect golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect

1284
go.sum

File diff suppressed because it is too large Load Diff

View File

@ -135,7 +135,7 @@ type ApiTag struct {
type TagJobApiRequest []*ApiTag type TagJobApiRequest []*ApiTag
func handleError(err error, statusCode int, rw http.ResponseWriter) { func handleError(err error, statusCode int, rw http.ResponseWriter) {
log.Warnf("REST API: %s", err.Error()) log.Warnf("REST ERROR : %s", err.Error())
rw.Header().Add("Content-Type", "application/json") rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(statusCode) rw.WriteHeader(statusCode)
json.NewEncoder(rw).Encode(ErrorResponse{ json.NewEncoder(rw).Encode(ErrorResponse{
@ -170,7 +170,7 @@ func decode(r io.Reader, val interface{}) error {
// @router /jobs/ [get] // @router /jobs/ [get]
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -301,7 +301,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/tag_job/{id} [post] // @router /jobs/tag_job/{id} [post]
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -366,7 +366,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/start_job/ [post] // @router /jobs/start_job/ [post]
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -447,7 +447,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/stop_job/{id} [post] // @router /jobs/stop_job/{id} [post]
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -500,7 +500,7 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/stop_job/ [post] // @router /jobs/stop_job/ [post]
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -546,7 +546,7 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/delete_job/{id} [delete] // @router /jobs/delete_job/{id} [delete]
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -594,7 +594,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/delete_job/ [delete] // @router /jobs/delete_job/ [delete]
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -650,7 +650,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
// @router /jobs/delete_job_before/{ts} [delete] // @router /jobs/delete_job_before/{ts} [delete]
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return return
} }
@ -725,7 +725,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
// func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) { // func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { // if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
// handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw) // handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
// return // return
// } // }
@ -794,7 +794,7 @@ func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
me := auth.GetUser(r.Context()) me := auth.GetUser(r.Context())
if !me.HasRole(auth.RoleAdmin) { if !me.HasRole(auth.RoleAdmin) {
if username != me.Username { if username != me.Username {
http.Error(rw, "only admins are allowed to sign JWTs not for themselves", http.StatusForbidden) http.Error(rw, "Only admins are allowed to sign JWTs not for themselves", http.StatusForbidden)
return return
} }
} }
@ -819,13 +819,13 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
rw.Header().Set("Content-Type", "text/plain") rw.Header().Set("Content-Type", "text/plain")
me := auth.GetUser(r.Context()) me := auth.GetUser(r.Context())
if !me.HasRole(auth.RoleAdmin) { if !me.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to create new users", http.StatusForbidden) http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
return return
} }
username, password, role, name, email, project := r.FormValue("username"), r.FormValue("password"), r.FormValue("role"), r.FormValue("name"), r.FormValue("email"), r.FormValue("project") username, password, role, name, email, project := r.FormValue("username"), r.FormValue("password"), r.FormValue("role"), r.FormValue("name"), r.FormValue("email"), r.FormValue("project")
if len(password) == 0 && role != auth.RoleApi { if len(password) == 0 && role != auth.RoleApi {
http.Error(rw, "only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest) http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
return return
} }
@ -848,12 +848,12 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
return return
} }
rw.Write([]byte(fmt.Sprintf("User %#v successfully created!\n", username))) rw.Write([]byte(fmt.Sprintf("User %v successfully created!\n", username)))
} }
func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) { if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to delete a user", http.StatusForbidden) http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
return return
} }
@ -868,7 +868,7 @@ func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) { if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to fetch a list of users", http.StatusForbidden) http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
return return
} }
@ -899,7 +899,7 @@ func (api *RestApi) getRoles(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) { if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to update a user", http.StatusForbidden) http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
return return
} }
@ -943,7 +943,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
rw.Header().Set("Content-Type", "text/plain") rw.Header().Set("Content-Type", "text/plain")
key, value := r.FormValue("key"), r.FormValue("value") key, value := r.FormValue("key"), r.FormValue("value")
fmt.Printf("KEY: %#v\nVALUE: %#v\n", key, value) fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, auth.GetUser(r.Context())); err != nil { if err := repository.GetUserCfgRepo().UpdateConfig(key, value, auth.GetUser(r.Context())); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity) http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
@ -955,7 +955,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
if api.MachineStateDir == "" { if api.MachineStateDir == "" {
http.Error(rw, "not enabled", http.StatusNotFound) http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
return return
} }
@ -986,7 +986,7 @@ func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) { func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
if api.MachineStateDir == "" { if api.MachineStateDir == "" {
http.Error(rw, "not enabled", http.StatusNotFound) http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
return return
} }

View File

@ -185,30 +185,20 @@ func Init(db *sqlx.DB,
configs map[string]interface{}) (*Authentication, error) { configs map[string]interface{}) (*Authentication, error) {
auth := &Authentication{} auth := &Authentication{}
auth.db = db auth.db = db
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL,
projects varchar(255) NOT NULL DEFAULT "[]");`)
if err != nil {
return nil, err
}
sessKey := os.Getenv("SESSION_KEY") sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" { if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)") log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32) bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil { if _, err := rand.Read(bytes); err != nil {
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
return nil, err return nil, err
} }
auth.sessionStore = sessions.NewCookieStore(bytes) auth.sessionStore = sessions.NewCookieStore(bytes)
} else { } else {
bytes, err := base64.StdEncoding.DecodeString(sessKey) bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil { if err != nil {
log.Error("Error while initializing authentication -> decoding session key failed")
return nil, err return nil, err
} }
auth.sessionStore = sessions.NewCookieStore(bytes) auth.sessionStore = sessions.NewCookieStore(bytes)
@ -216,12 +206,14 @@ func Init(db *sqlx.DB,
auth.LocalAuth = &LocalAuthenticator{} auth.LocalAuth = &LocalAuthenticator{}
if err := auth.LocalAuth.Init(auth, nil); err != nil { if err := auth.LocalAuth.Init(auth, nil); err != nil {
log.Error("Error while initializing authentication -> localAuth init failed")
return nil, err return nil, err
} }
auth.authenticators = append(auth.authenticators, auth.LocalAuth) auth.authenticators = append(auth.authenticators, auth.LocalAuth)
auth.JwtAuth = &JWTAuthenticator{} auth.JwtAuth = &JWTAuthenticator{}
if err := auth.JwtAuth.Init(auth, configs["jwt"]); err != nil { if err := auth.JwtAuth.Init(auth, configs["jwt"]); err != nil {
log.Error("Error while initializing authentication -> jwtAuth init failed")
return nil, err return nil, err
} }
auth.authenticators = append(auth.authenticators, auth.JwtAuth) auth.authenticators = append(auth.authenticators, auth.JwtAuth)
@ -229,6 +221,7 @@ func Init(db *sqlx.DB,
if config, ok := configs["ldap"]; ok { if config, ok := configs["ldap"]; ok {
auth.LdapAuth = &LdapAuthenticator{} auth.LdapAuth = &LdapAuthenticator{}
if err := auth.LdapAuth.Init(auth, config); err != nil { if err := auth.LdapAuth.Init(auth, config); err != nil {
log.Error("Error while initializing authentication -> ldapAuth init failed")
return nil, err return nil, err
} }
auth.authenticators = append(auth.authenticators, auth.LdapAuth) auth.authenticators = append(auth.authenticators, auth.LdapAuth)
@ -243,6 +236,7 @@ func (auth *Authentication) AuthViaSession(
session, err := auth.sessionStore.Get(r, "session") session, err := auth.sessionStore.Get(r, "session")
if err != nil { if err != nil {
log.Error("Error while getting session store")
return nil, err return nil, err
} }
@ -272,7 +266,7 @@ func (auth *Authentication) Login(
user := (*User)(nil) user := (*User)(nil)
if username != "" { if username != "" {
if user, _ = auth.GetUser(username); err != nil { if user, _ = auth.GetUser(username); err != nil {
// log.Warnf("login of unkown user %#v", username) // log.Warnf("login of unkown user %v", username)
_ = err _ = err
} }
} }
@ -284,7 +278,7 @@ func (auth *Authentication) Login(
user, err = authenticator.Login(user, rw, r) user, err = authenticator.Login(user, rw, r)
if err != nil { if err != nil {
log.Warnf("login failed: %s", err.Error()) log.Warnf("user '%s' login failed: %s", user.Username, err.Error())
onfailure(rw, r, err) onfailure(rw, r, err)
return return
} }
@ -303,7 +297,7 @@ func (auth *Authentication) Login(
session.Values["projects"] = user.Projects session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles session.Values["roles"] = user.Roles
if err := auth.sessionStore.Save(r, rw, session); err != nil { if err := auth.sessionStore.Save(r, rw, session); err != nil {
log.Errorf("session save failed: %s", err.Error()) log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError) http.Error(rw, err.Error(), http.StatusInternalServerError)
return return
} }

View File

@ -45,11 +45,13 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
} else { } else {
bytes, err := base64.StdEncoding.DecodeString(pubKey) bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil { if err != nil {
log.Warn("Could not decode JWT public key")
return err return err
} }
ja.publicKey = ed25519.PublicKey(bytes) ja.publicKey = ed25519.PublicKey(bytes)
bytes, err = base64.StdEncoding.DecodeString(privKey) bytes, err = base64.StdEncoding.DecodeString(privKey)
if err != nil { if err != nil {
log.Warn("Could not decode JWT private key")
return err return err
} }
ja.privateKey = ed25519.PrivateKey(bytes) ja.privateKey = ed25519.PrivateKey(bytes)
@ -58,6 +60,7 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
if pubKey = os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" { if pubKey = os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKey) bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil { if err != nil {
log.Warn("Could not decode cross login JWT HS512 key")
return err return err
} }
ja.loginTokenKey = bytes ja.loginTokenKey = bytes
@ -68,6 +71,7 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
if keyFound && pubKeyCrossLogin != "" { if keyFound && pubKeyCrossLogin != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin) bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
if err != nil { if err != nil {
log.Warn("Could not decode cross login JWT public key")
return err return err
} }
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes) ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
@ -123,13 +127,15 @@ func (ja *JWTAuthenticator) Login(
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 { if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
return ja.loginTokenKey, nil return ja.loginTokenKey, nil
} }
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg()) return nil, fmt.Errorf("AUTH/JWT > unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
}) })
if err != nil { if err != nil {
log.Warn("Error while parsing jwt token")
return nil, err return nil, err
} }
if err := token.Claims.Valid(); err != nil { if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, err return nil, err
} }
@ -151,6 +157,7 @@ func (ja *JWTAuthenticator) Login(
if user == nil { if user == nil {
user, err = ja.auth.GetUser(sub) user, err = ja.auth.GetUser(sub)
if err != nil && err != sql.ErrNoRows { if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%v'", sub)
return nil, err return nil, err
} else if user == nil { } else if user == nil {
user = &User{ user = &User{
@ -159,6 +166,7 @@ func (ja *JWTAuthenticator) Login(
AuthSource: AuthViaToken, AuthSource: AuthViaToken,
} }
if err := ja.auth.AddUser(user); err != nil { if err := ja.auth.AddUser(user); err != nil {
log.Errorf("Error while adding user '%v' to auth from token", user.Username)
return nil, err return nil, err
} }
} }
@ -223,11 +231,13 @@ func (ja *JWTAuthenticator) Auth(
return ja.publicKey, nil return ja.publicKey, nil
}) })
if err != nil { if err != nil {
log.Warn("Error while parsing token")
return nil, err return nil, err
} }
// Check token validity // Check token validity
if err := token.Claims.Valid(); err != nil { if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, err return nil, err
} }
@ -276,7 +286,7 @@ func (ja *JWTAuthenticator) Auth(
session.Values["roles"] = roles session.Values["roles"] = roles
if err := ja.auth.sessionStore.Save(r, rw, session); err != nil { if err := ja.auth.sessionStore.Save(r, rw, session); err != nil {
log.Errorf("session save failed: %s", err.Error()) log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError) http.Error(rw, err.Error(), http.StatusInternalServerError)
return nil, err return nil, err
} }

View File

@ -39,21 +39,23 @@ func (la *LdapAuthenticator) Init(
if la.config != nil && la.config.SyncInterval != "" { if la.config != nil && la.config.SyncInterval != "" {
interval, err := time.ParseDuration(la.config.SyncInterval) interval, err := time.ParseDuration(la.config.SyncInterval)
if err != nil { if err != nil {
log.Warnf("Could not parse duration for sync interval: %v", la.config.SyncInterval)
return err return err
} }
if interval == 0 { if interval == 0 {
log.Info("Sync interval is zero")
return nil return nil
} }
go func() { go func() {
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
for t := range ticker.C { for t := range ticker.C {
log.Printf("LDAP sync started at %s", t.Format(time.RFC3339)) log.Printf("sync started at %s", t.Format(time.RFC3339))
if err := la.Sync(); err != nil { if err := la.Sync(); err != nil {
log.Errorf("LDAP sync failed: %s", err.Error()) log.Errorf("sync failed: %s", err.Error())
} }
log.Print("LDAP sync done") log.Print("sync done")
} }
}() }()
} }
@ -76,12 +78,14 @@ func (la *LdapAuthenticator) Login(
l, err := la.getLdapConnection(false) l, err := la.getLdapConnection(false)
if err != nil { if err != nil {
log.Warn("Error while getting ldap connection")
return nil, err return nil, err
} }
defer l.Close() defer l.Close()
userDn := strings.Replace(la.config.UserBind, "{username}", user.Username, -1) userDn := strings.Replace(la.config.UserBind, "{username}", user.Username, -1)
if err := l.Bind(userDn, r.FormValue("password")); err != nil { if err := l.Bind(userDn, r.FormValue("password")); err != nil {
log.Error("Error while binding to ldap connection")
return nil, err return nil, err
} }
@ -104,12 +108,14 @@ func (la *LdapAuthenticator) Sync() error {
users := map[string]int{} users := map[string]int{}
rows, err := la.auth.db.Query(`SELECT username FROM user WHERE user.ldap = 1`) rows, err := la.auth.db.Query(`SELECT username FROM user WHERE user.ldap = 1`)
if err != nil { if err != nil {
log.Warn("Error while querying LDAP users")
return err return err
} }
for rows.Next() { for rows.Next() {
var username string var username string
if err := rows.Scan(&username); err != nil { if err := rows.Scan(&username); err != nil {
log.Warnf("Error while scanning for user '%s'", username)
return err return err
} }
@ -118,6 +124,7 @@ func (la *LdapAuthenticator) Sync() error {
l, err := la.getLdapConnection(true) l, err := la.getLdapConnection(true)
if err != nil { if err != nil {
log.Error("LDAP connection error")
return err return err
} }
defer l.Close() defer l.Close()
@ -126,6 +133,7 @@ func (la *LdapAuthenticator) Sync() error {
la.config.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false, la.config.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
la.config.UserFilter, []string{"dn", "uid", "gecos"}, nil)) la.config.UserFilter, []string{"dn", "uid", "gecos"}, nil))
if err != nil { if err != nil {
log.Warn("LDAP search error")
return err return err
} }
@ -147,15 +155,17 @@ func (la *LdapAuthenticator) Sync() error {
for username, where := range users { for username, where := range users {
if where == IN_DB && la.config.SyncDelOldUsers { if where == IN_DB && la.config.SyncDelOldUsers {
log.Debugf("ldap-sync: remove %#v (does not show up in LDAP anymore)", username) log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
if _, err := la.auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil { if _, err := la.auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
log.Errorf("User '%s' not in LDAP anymore: Delete from DB failed", username)
return err return err
} }
} else if where == IN_LDAP { } else if where == IN_LDAP {
name := newnames[username] name := newnames[username]
log.Debugf("ldap-sync: add %#v (name: %#v, roles: [user], ldap: true)", username, name) log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
if _, err := la.auth.db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`, if _, err := la.auth.db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`,
username, 1, name, "[\""+RoleUser+"\"]"); err != nil { username, 1, name, "[\""+RoleUser+"\"]"); err != nil {
log.Errorf("User '%s' new in LDAP: Insert into DB failed", username)
return err return err
} }
} }
@ -170,12 +180,14 @@ func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
conn, err := ldap.DialURL(la.config.Url) conn, err := ldap.DialURL(la.config.Url)
if err != nil { if err != nil {
log.Warn("LDAP URL dial failed")
return nil, err return nil, err
} }
if admin { if admin {
if err := conn.Bind(la.config.SearchDN, la.syncPassword); err != nil { if err := conn.Bind(la.config.SearchDN, la.syncPassword); err != nil {
conn.Close() conn.Close()
log.Warn("LDAP connection bind failed")
return nil, err return nil, err
} }
} }

View File

@ -39,7 +39,7 @@ func (la *LocalAuthenticator) Login(
r *http.Request) (*User, error) { r *http.Request) (*User, error) {
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil { if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
return nil, fmt.Errorf("user '%s' provided the wrong password (%w)", user.Username, e) return nil, fmt.Errorf("AUTH/LOCAL > user '%s' provided the wrong password (%w)", user.Username, e)
} }
return user, nil return user, nil

View File

@ -25,6 +25,7 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user"). if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
Where("user.username = ?", username).RunWith(auth.db). Where("user.username = ?", username).RunWith(auth.db).
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil { QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
log.Warnf("Error while querying user '%v' from database", username)
return nil, err return nil, err
} }
@ -33,6 +34,7 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
user.Email = email.String user.Email = email.String
if rawRoles.Valid { if rawRoles.Valid {
if err := json.Unmarshal([]byte(rawRoles.String), &user.Roles); err != nil { if err := json.Unmarshal([]byte(rawRoles.String), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw roles from DB")
return nil, err return nil, err
} }
} }
@ -64,6 +66,7 @@ func (auth *Authentication) AddUser(user *User) error {
if user.Password != "" { if user.Password != "" {
password, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost) password, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost)
if err != nil { if err != nil {
log.Error("Error while encrypting new user password")
return err return err
} }
cols = append(cols, "password") cols = append(cols, "password")
@ -71,6 +74,7 @@ func (auth *Authentication) AddUser(user *User) error {
} }
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(auth.db).Exec(); err != nil { if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
return err return err
} }
@ -81,6 +85,7 @@ func (auth *Authentication) AddUser(user *User) error {
func (auth *Authentication) DelUser(username string) error { func (auth *Authentication) DelUser(username string) error {
_, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username) _, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
log.Errorf("Error while deleting user '%s' from DB", username)
return err return err
} }
@ -93,6 +98,7 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
rows, err := q.RunWith(auth.db).Query() rows, err := q.RunWith(auth.db).Query()
if err != nil { if err != nil {
log.Warn("Error while querying user list")
return nil, err return nil, err
} }
@ -104,10 +110,12 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
user := &User{} user := &User{}
var name, email sql.NullString var name, email sql.NullString
if err := rows.Scan(&user.Username, &name, &email, &rawroles, &rawprojects); err != nil { if err := rows.Scan(&user.Username, &name, &email, &rawroles, &rawprojects); err != nil {
log.Warn("Error while scanning user list")
return nil, err return nil, err
} }
if err := json.Unmarshal([]byte(rawroles), &user.Roles); err != nil { if err := json.Unmarshal([]byte(rawroles), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw role list")
return nil, err return nil, err
} }
@ -129,11 +137,12 @@ func (auth *Authentication) AddRole(
user, err := auth.GetUser(username) user, err := auth.GetUser(username)
if err != nil { if err != nil {
log.Warnf("Could not load user '%s'", username)
return err return err
} }
if !IsValidRole(role) { if !IsValidRole(role) {
return fmt.Errorf("invalid user role: %#v", role) return fmt.Errorf("Invalid user role: %v", role)
} }
if user.HasRole(role) { if user.HasRole(role) {
@ -142,6 +151,7 @@ func (auth *Authentication) AddRole(
roles, _ := json.Marshal(append(user.Roles, role)) roles, _ := json.Marshal(append(user.Roles, role))
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil { if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while adding new role for user '%s'", user.Username)
return err return err
} }
return nil return nil
@ -150,11 +160,12 @@ func (auth *Authentication) AddRole(
func (auth *Authentication) RemoveRole(ctx context.Context, username string, role string) error { func (auth *Authentication) RemoveRole(ctx context.Context, username string, role string) error {
user, err := auth.GetUser(username) user, err := auth.GetUser(username)
if err != nil { if err != nil {
log.Warnf("Could not load user '%s'", username)
return err return err
} }
if !IsValidRole(role) { if !IsValidRole(role) {
return fmt.Errorf("invalid user role: %#v", role) return fmt.Errorf("Invalid user role: %#v", role)
} }
if role == RoleManager && len(user.Projects) != 0 { if role == RoleManager && len(user.Projects) != 0 {
@ -174,11 +185,12 @@ func (auth *Authentication) RemoveRole(ctx context.Context, username string, rol
if exists == true { if exists == true {
var mroles, _ = json.Marshal(newroles) var mroles, _ = json.Marshal(newroles)
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil { if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while removing role for user '%s'", user.Username)
return err return err
} }
return nil return nil
} else { } else {
return fmt.Errorf("user %#v already does not have role %#v", username, role) return fmt.Errorf("User '%v' already does not have role: %v", username, role)
} }
} }
@ -259,9 +271,13 @@ func FetchUser(ctx context.Context, db *sqlx.DB, username string) (*model.User,
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username). if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
RunWith(db).QueryRow().Scan(&name, &email); err != nil { RunWith(db).QueryRow().Scan(&name, &email); err != nil {
if err == sql.ErrNoRows { if err == sql.ErrNoRows {
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
/* since FetchUser will be called to retrieve full name and mail for every job in query/list */
// log.Warnf("User '%s' Not found in DB", username)
return nil, nil return nil, nil
} }
log.Warnf("Error while fetching user '%s'", username)
return nil, err return nil, err
} }

View File

@ -49,7 +49,7 @@ func Init(flagConfigFile string) {
raw, err := os.ReadFile(flagConfigFile) raw, err := os.ReadFile(flagConfigFile)
if err != nil { if err != nil {
if !os.IsNotExist(err) { if !os.IsNotExist(err) {
log.Fatal(err) log.Fatalf("CONFIG ERROR: %v", err)
} }
} else { } else {
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil { if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
@ -58,7 +58,7 @@ func Init(flagConfigFile string) {
dec := json.NewDecoder(bytes.NewReader(raw)) dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields() dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil { if err := dec.Decode(&Keys); err != nil {
log.Fatal(err) log.Fatalf("could not decode: %v", err)
} }
if Keys.Clusters == nil || len(Keys.Clusters) < 1 { if Keys.Clusters == nil || len(Keys.Clusters) < 1 {

View File

@ -19,7 +19,7 @@ func TestInit(t *testing.T) {
func TestInitMinimal(t *testing.T) { func TestInitMinimal(t *testing.T) {
fp := "../../docs/config.json" fp := "../../docs/config.json"
Init(fp) Init(fp)
if Keys.Addr != "0.0.0.0:8080" { if Keys.Addr != "127.0.0.1:8080" {
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:8080", Keys.Addr) t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
} }
} }

View File

@ -88,6 +88,7 @@ type ComplexityRoot struct {
Exclusive func(childComplexity int) int Exclusive func(childComplexity int) int
ID func(childComplexity int) int ID func(childComplexity int) int
JobID func(childComplexity int) int JobID func(childComplexity int) int
JobName func(childComplexity int) int
MetaData func(childComplexity int) int MetaData func(childComplexity int) int
MonitoringStatus func(childComplexity int) int MonitoringStatus func(childComplexity int) int
NumAcc func(childComplexity int) int NumAcc func(childComplexity int) int
@ -130,6 +131,7 @@ type ComplexityRoot struct {
HistDuration func(childComplexity int) int HistDuration func(childComplexity int) int
HistNumNodes func(childComplexity int) int HistNumNodes func(childComplexity int) int
ID func(childComplexity int) int ID func(childComplexity int) int
Name func(childComplexity int) int
ShortJobs func(childComplexity int) int ShortJobs func(childComplexity int) int
TotalCoreHours func(childComplexity int) int TotalCoreHours func(childComplexity int) int
TotalJobs func(childComplexity int) int TotalJobs func(childComplexity int) int
@ -263,6 +265,8 @@ type ClusterResolver interface {
Partitions(ctx context.Context, obj *schema.Cluster) ([]string, error) Partitions(ctx context.Context, obj *schema.Cluster) ([]string, error)
} }
type JobResolver interface { type JobResolver interface {
JobName(ctx context.Context, obj *schema.Job) (*string, error)
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error)
@ -452,6 +456,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Job.JobID(childComplexity), true return e.complexity.Job.JobID(childComplexity), true
case "Job.jobName":
if e.complexity.Job.JobName == nil {
break
}
return e.complexity.Job.JobName(childComplexity), true
case "Job.metaData": case "Job.metaData":
if e.complexity.Job.MetaData == nil { if e.complexity.Job.MetaData == nil {
break break
@ -662,6 +673,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.JobsStatistics.ID(childComplexity), true return e.complexity.JobsStatistics.ID(childComplexity), true
case "JobsStatistics.name":
if e.complexity.JobsStatistics.Name == nil {
break
}
return e.complexity.JobsStatistics.Name(childComplexity), true
case "JobsStatistics.shortJobs": case "JobsStatistics.shortJobs":
if e.complexity.JobsStatistics.ShortJobs == nil { if e.complexity.JobsStatistics.ShortJobs == nil {
break break
@ -1391,6 +1409,7 @@ type Job {
jobId: Int! jobId: Int!
user: String! user: String!
project: String! project: String!
jobName: String
cluster: String! cluster: String!
subCluster: String! subCluster: String!
startTime: Time! startTime: Time!
@ -1578,14 +1597,15 @@ type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { from: Time!, to: Time! } type TimeRangeOutput { from: Time!, to: Time! }
input JobFilter { input JobFilter {
tags: [ID!] tags: [ID!]
jobId: StringInput jobId: StringInput
arrayJobId: Int arrayJobId: Int
user: StringInput user: StringInput
project: StringInput project: StringInput
cluster: StringInput jobName: StringInput
partition: StringInput cluster: StringInput
duration: IntRange partition: StringInput
duration: IntRange
minRunningFor: Int minRunningFor: Int
@ -1616,6 +1636,7 @@ input StringInput {
contains: String contains: String
startsWith: String startsWith: String
endsWith: String endsWith: String
in: [String!]
} }
input IntRange { from: Int!, to: Int! } input IntRange { from: Int!, to: Int! }
@ -1636,6 +1657,7 @@ type HistoPoint {
type JobsStatistics { type JobsStatistics {
id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster
name: String # if User-Statistics: Given Name of Account (ID) Owner
totalJobs: Int! # Number of jobs that matched totalJobs: Int! # Number of jobs that matched
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
totalWalltime: Int! # Sum of the duration of all matched jobs in hours totalWalltime: Int! # Sum of the duration of all matched jobs in hours
@ -3038,6 +3060,47 @@ func (ec *executionContext) fieldContext_Job_project(ctx context.Context, field
return fc, nil return fc, nil
} }
func (ec *executionContext) _Job_jobName(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_jobName(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Job().JobName(rctx, obj)
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(*string)
fc.Result = res
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_jobName(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { func (ec *executionContext) _Job_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_cluster(ctx, field) fc, err := ec.fieldContext_Job_cluster(ctx, field)
if err != nil { if err != nil {
@ -4231,6 +4294,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(ctx context.Context
return ec.fieldContext_Job_user(ctx, field) return ec.fieldContext_Job_user(ctx, field)
case "project": case "project":
return ec.fieldContext_Job_project(ctx, field) return ec.fieldContext_Job_project(ctx, field)
case "jobName":
return ec.fieldContext_Job_jobName(ctx, field)
case "cluster": case "cluster":
return ec.fieldContext_Job_cluster(ctx, field) return ec.fieldContext_Job_cluster(ctx, field)
case "subCluster": case "subCluster":
@ -4441,6 +4506,47 @@ func (ec *executionContext) fieldContext_JobsStatistics_id(ctx context.Context,
return fc, nil return fc, nil
} }
func (ec *executionContext) _JobsStatistics_name(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_JobsStatistics_name(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.Name, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(*string)
fc.Result = res
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_JobsStatistics_name(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "JobsStatistics",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) { func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field) fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
if err != nil { if err != nil {
@ -6078,6 +6184,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
return ec.fieldContext_Job_user(ctx, field) return ec.fieldContext_Job_user(ctx, field)
case "project": case "project":
return ec.fieldContext_Job_project(ctx, field) return ec.fieldContext_Job_project(ctx, field)
case "jobName":
return ec.fieldContext_Job_jobName(ctx, field)
case "cluster": case "cluster":
return ec.fieldContext_Job_cluster(ctx, field) return ec.fieldContext_Job_cluster(ctx, field)
case "subCluster": case "subCluster":
@ -6357,6 +6465,8 @@ func (ec *executionContext) fieldContext_Query_jobsStatistics(ctx context.Contex
switch field.Name { switch field.Name {
case "id": case "id":
return ec.fieldContext_JobsStatistics_id(ctx, field) return ec.fieldContext_JobsStatistics_id(ctx, field)
case "name":
return ec.fieldContext_JobsStatistics_name(ctx, field)
case "totalJobs": case "totalJobs":
return ec.fieldContext_JobsStatistics_totalJobs(ctx, field) return ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
case "shortJobs": case "shortJobs":
@ -10389,7 +10499,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
asMap[k] = v asMap[k] = v
} }
fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"} fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"}
for _, k := range fieldsInOrder { for _, k := range fieldsInOrder {
v, ok := asMap[k] v, ok := asMap[k]
if !ok { if !ok {
@ -10436,6 +10546,14 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
if err != nil { if err != nil {
return it, err return it, err
} }
case "jobName":
var err error
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("jobName"))
it.JobName, err = ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
if err != nil {
return it, err
}
case "cluster": case "cluster":
var err error var err error
@ -10629,7 +10747,7 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
asMap[k] = v asMap[k] = v
} }
fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith"} fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith", "in"}
for _, k := range fieldsInOrder { for _, k := range fieldsInOrder {
v, ok := asMap[k] v, ok := asMap[k]
if !ok { if !ok {
@ -10668,6 +10786,14 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
if err != nil { if err != nil {
return it, err return it, err
} }
case "in":
var err error
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("in"))
it.In, err = ec.unmarshalOString2ᚕstringᚄ(ctx, v)
if err != nil {
return it, err
}
} }
} }
@ -11000,6 +11126,23 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
if out.Values[i] == graphql.Null { if out.Values[i] == graphql.Null {
atomic.AddUint32(&invalids, 1) atomic.AddUint32(&invalids, 1)
} }
case "jobName":
field := field
innerFunc := func(ctx context.Context) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Job_jobName(ctx, field, obj)
return res
}
out.Concurrently(i, func() graphql.Marshaler {
return innerFunc(ctx)
})
case "cluster": case "cluster":
out.Values[i] = ec._Job_cluster(ctx, field, obj) out.Values[i] = ec._Job_cluster(ctx, field, obj)
@ -11312,6 +11455,10 @@ func (ec *executionContext) _JobsStatistics(ctx context.Context, sel ast.Selecti
if out.Values[i] == graphql.Null { if out.Values[i] == graphql.Null {
invalids++ invalids++
} }
case "name":
out.Values[i] = ec._JobsStatistics_name(ctx, field, obj)
case "totalJobs": case "totalJobs":
out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj) out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj)

View File

@ -42,6 +42,7 @@ type JobFilter struct {
ArrayJobID *int `json:"arrayJobId"` ArrayJobID *int `json:"arrayJobId"`
User *StringInput `json:"user"` User *StringInput `json:"user"`
Project *StringInput `json:"project"` Project *StringInput `json:"project"`
JobName *StringInput `json:"jobName"`
Cluster *StringInput `json:"cluster"` Cluster *StringInput `json:"cluster"`
Partition *StringInput `json:"partition"` Partition *StringInput `json:"partition"`
Duration *schema.IntRange `json:"duration"` Duration *schema.IntRange `json:"duration"`
@ -71,6 +72,7 @@ type JobResultList struct {
type JobsStatistics struct { type JobsStatistics struct {
ID string `json:"id"` ID string `json:"id"`
Name *string `json:"name"`
TotalJobs int `json:"totalJobs"` TotalJobs int `json:"totalJobs"`
ShortJobs int `json:"shortJobs"` ShortJobs int `json:"shortJobs"`
TotalWalltime int `json:"totalWalltime"` TotalWalltime int `json:"totalWalltime"`
@ -101,10 +103,11 @@ type PageRequest struct {
} }
type StringInput struct { type StringInput struct {
Eq *string `json:"eq"` Eq *string `json:"eq"`
Contains *string `json:"contains"` Contains *string `json:"contains"`
StartsWith *string `json:"startsWith"` StartsWith *string `json:"startsWith"`
EndsWith *string `json:"endsWith"` EndsWith *string `json:"endsWith"`
In []string `json:"in"`
} }
type TimeRangeOutput struct { type TimeRangeOutput struct {

View File

@ -16,6 +16,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
) )
@ -24,6 +25,11 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
return r.Repo.Partitions(obj.Name) return r.Repo.Partitions(obj.Name)
} }
// JobName is the resolver for the jobName field.
func (r *jobResolver) JobName(ctx context.Context, obj *schema.Job) (*string, error) {
return r.Repo.FetchJobName(obj)
}
// Tags is the resolver for the tags field. // Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) { func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(&obj.ID) return r.Repo.GetTags(&obj.ID)
@ -43,6 +49,7 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) { func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name) id, err := r.Repo.CreateTag(typeArg, name)
if err != nil { if err != nil {
log.Warn("Error while creating tag")
return nil, err return nil, err
} }
@ -58,6 +65,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) { func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64) jid, err := strconv.ParseInt(job, 10, 64)
if err != nil { if err != nil {
log.Warn("Error while adding tag to job")
return nil, err return nil, err
} }
@ -65,10 +73,12 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
for _, tagId := range tagIds { for _, tagId := range tagIds {
tid, err := strconv.ParseInt(tagId, 10, 64) tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil { if err != nil {
log.Warn("Error while parsing tag id")
return nil, err return nil, err
} }
if tags, err = r.Repo.AddTag(jid, tid); err != nil { if tags, err = r.Repo.AddTag(jid, tid); err != nil {
log.Warn("Error while adding tag")
return nil, err return nil, err
} }
} }
@ -80,6 +90,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) { func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64) jid, err := strconv.ParseInt(job, 10, 64)
if err != nil { if err != nil {
log.Warn("Error while parsing job id")
return nil, err return nil, err
} }
@ -87,10 +98,12 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
for _, tagId := range tagIds { for _, tagId := range tagIds {
tid, err := strconv.ParseInt(tagId, 10, 64) tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil { if err != nil {
log.Warn("Error while parsing tag id")
return nil, err return nil, err
} }
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil { if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
log.Warn("Error while removing tag")
return nil, err return nil, err
} }
} }
@ -101,6 +114,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
// UpdateConfiguration is the resolver for the updateConfiguration field. // UpdateConfiguration is the resolver for the updateConfiguration field.
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) { func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, auth.GetUser(ctx)); err != nil { if err := repository.GetUserCfgRepo().UpdateConfig(name, value, auth.GetUser(ctx)); err != nil {
log.Warn("Error while updating user config")
return nil, err return nil, err
} }
@ -126,6 +140,7 @@ func (r *queryResolver) User(ctx context.Context, username string) (*model.User,
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) { func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
data, err := r.Repo.AllocatedNodes(cluster) data, err := r.Repo.AllocatedNodes(cluster)
if err != nil { if err != nil {
log.Warn("Error while fetching allocated nodes")
return nil, err return nil, err
} }
@ -144,11 +159,13 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) { func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
numericId, err := strconv.ParseInt(id, 10, 64) numericId, err := strconv.ParseInt(id, 10, 64)
if err != nil { if err != nil {
log.Warn("Error while parsing job id")
return nil, err return nil, err
} }
job, err := r.Repo.FindById(numericId) job, err := r.Repo.FindById(numericId)
if err != nil { if err != nil {
log.Warn("Error while finding job by id")
return nil, err return nil, err
} }
@ -163,11 +180,13 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) { func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
job, err := r.Query().Job(ctx, id) job, err := r.Query().Job(ctx, id)
if err != nil { if err != nil {
log.Warn("Error while querying job for metrics")
return nil, err return nil, err
} }
data, err := metricdata.LoadData(job, metrics, scopes, ctx) data, err := metricdata.LoadData(job, metrics, scopes, ctx)
if err != nil { if err != nil {
log.Warn("Error while loading job data")
return nil, err return nil, err
} }
@ -175,7 +194,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
for name, md := range data { for name, md := range data {
for scope, metric := range md { for scope, metric := range md {
if metric.Scope != schema.MetricScope(scope) { if metric.Scope != schema.MetricScope(scope) {
panic("WTF?") log.Panic("metric.Scope != schema.MetricScope(scope) : Should not happen!")
} }
res = append(res, &model.JobMetricWithName{ res = append(res, &model.JobMetricWithName{
@ -204,11 +223,13 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
jobs, err := r.Repo.QueryJobs(ctx, filter, page, order) jobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
if err != nil { if err != nil {
log.Warn("Error while querying jobs")
return nil, err return nil, err
} }
count, err := r.Repo.CountJobs(ctx, filter) count, err := r.Repo.CountJobs(ctx, filter)
if err != nil { if err != nil {
log.Warn("Error while counting jobs")
return nil, err return nil, err
} }
@ -217,13 +238,14 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
// JobsStatistics is the resolver for the jobsStatistics field. // JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) { func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
return r.jobsStatistics(ctx, filter, groupBy) return r.Repo.JobsStatistics(ctx, filter, groupBy)
} }
// JobsCount is the resolver for the jobsCount field. // JobsCount is the resolver for the jobsCount field.
func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) { func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) {
counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit) counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit)
if err != nil { if err != nil {
log.Warn("Error while counting grouped jobs")
return nil, err return nil, err
} }
@ -257,6 +279,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil { if err != nil {
log.Warn("Error while loading node data")
return nil, err return nil, err
} }

View File

@ -6,217 +6,16 @@ package graph
import ( import (
"context" "context"
"database/sql"
"errors" "errors"
"fmt" "fmt"
"math" "math"
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
) )
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
const ShortJobDuration int = 5 * 60
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
stats := map[string]*model.JobsStatistics{}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
var rawQuery sq.SelectBuilder
if groupBy == nil {
rawQuery = sq.Select(
"''",
"COUNT(job.id)",
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
corehoursCol,
).From("job")
} else {
col := groupBy2column[*groupBy]
rawQuery = sq.Select(
col,
"COUNT(job.id)",
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
corehoursCol,
).From("job").GroupBy(col)
}
rawQuery = rawQuery.
Where("job.cluster = ?", cluster.Name).
Where("job.subcluster = ?", subcluster.Name)
query, qerr := repository.SecurityCheck(ctx, rawQuery)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
return nil, err
}
for rows.Next() {
var id sql.NullString
var jobs, walltime, corehours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
return nil, err
}
if id.Valid {
if s, ok := stats[id.String]; ok {
s.TotalJobs += int(jobs.Int64)
s.TotalWalltime += int(walltime.Int64)
s.TotalCoreHours += int(corehours.Int64)
} else {
stats[id.String] = &model.JobsStatistics{
ID: id.String,
TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64),
TotalCoreHours: int(corehours.Int64),
}
}
}
}
}
}
if groupBy == nil {
query, qerr := repository.SecurityCheck(ctx, sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration))
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
return nil, err
}
} else {
col := groupBy2column[*groupBy]
query, qerr := repository.SecurityCheck(ctx, sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration))
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
return nil, err
}
for rows.Next() {
var id sql.NullString
var shortJobs sql.NullInt64
if err := rows.Scan(&id, &shortJobs); err != nil {
return nil, err
}
if id.Valid {
stats[id.String].ShortJobs = int(shortJobs.Int64)
}
}
}
// Calculating the histogram data is expensive, so only do it if needed.
// An explicit resolver can not be used because we need to know the filters.
histogramsNeeded := false
fields := graphql.CollectFieldsCtx(ctx, nil)
for _, col := range fields {
if col.Name == "histDuration" || col.Name == "histNumNodes" {
histogramsNeeded = true
}
}
res := make([]*model.JobsStatistics, 0, len(stats))
for _, stat := range stats {
res = append(res, stat)
id, col := "", ""
if groupBy != nil {
id = stat.ID
col = groupBy2column[*groupBy]
}
if histogramsNeeded {
var err error
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as int) as value`, time.Now().Unix())
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
if err != nil {
return nil, err
}
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
if err != nil {
return nil, err
}
}
}
return res, nil
}
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
// to add a condition to the query of the kind "<col> = <id>".
func (r *queryResolver) jobsStatisticsHistogram(ctx context.Context, value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
query, qerr := repository.SecurityCheck(ctx, sq.Select(value, "COUNT(job.id) AS count").From("job"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
query = repository.BuildWhereClause(f, query)
}
if len(id) != 0 && len(col) != 0 {
query = query.Where(col+" = ?", id)
}
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
if err != nil {
return nil, err
}
points := make([]*model.HistoPoint, 0)
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
return nil, err
}
points = append(points, &point)
}
return points, nil
}
const MAX_JOBS_FOR_ANALYSIS = 500 const MAX_JOBS_FOR_ANALYSIS = 500
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full. // Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
@ -228,10 +27,11 @@ func (r *queryResolver) rooflineHeatmap(
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil) jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil { if err != nil {
log.Error("Error while querying jobs for roofline")
return nil, err return nil, err
} }
if len(jobs) > MAX_JOBS_FOR_ANALYSIS { if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
} }
fcols, frows := float64(cols), float64(rows) fcols, frows := float64(cols), float64(rows)
@ -248,19 +48,20 @@ func (r *queryResolver) rooflineHeatmap(
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx) jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil { if err != nil {
log.Error("Error while loading metrics for roofline")
return nil, err return nil, err
} }
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"] flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
if flops_ == nil && membw_ == nil { if flops_ == nil && membw_ == nil {
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %d", job.ID) return nil, fmt.Errorf("GRAPH/STATS > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
} }
flops, ok1 := flops_["node"] flops, ok1 := flops_["node"]
membw, ok2 := membw_["node"] membw, ok2 := membw_["node"]
if !ok1 || !ok2 { if !ok1 || !ok2 {
// TODO/FIXME: // TODO/FIXME:
return nil, errors.New("todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") return nil, errors.New("GRAPH/STATS > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
} }
for n := 0; n < len(flops.Series); n++ { for n := 0; n < len(flops.Series); n++ {
@ -292,10 +93,11 @@ func (r *queryResolver) rooflineHeatmap(
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) { func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil) jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil { if err != nil {
log.Error("Error while querying jobs for footprint")
return nil, err return nil, err
} }
if len(jobs) > MAX_JOBS_FOR_ANALYSIS { if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
} }
avgs := make([][]schema.Float, len(metrics)) avgs := make([][]schema.Float, len(metrics))
@ -310,6 +112,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
} }
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil { if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Error("Error while loading averages for footprint")
return nil, err return nil, err
} }

View File

@ -16,6 +16,7 @@ import (
"time" "time"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
) )
@ -78,6 +79,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
var config CCMetricStoreConfig var config CCMetricStoreConfig
if err := json.Unmarshal(rawConfig, &config); err != nil { if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err return err
} }
@ -124,11 +126,13 @@ func (ccms *CCMetricStore) doRequest(
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := json.NewEncoder(buf).Encode(body); err != nil { if err := json.NewEncoder(buf).Encode(body); err != nil {
log.Warn("Error while encoding request body")
return nil, err return nil, err
} }
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf) req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
if err != nil { if err != nil {
log.Warn("Error while building request body")
return nil, err return nil, err
} }
if ccms.jwt != "" { if ccms.jwt != "" {
@ -137,6 +141,7 @@ func (ccms *CCMetricStore) doRequest(
res, err := ccms.client.Do(req) res, err := ccms.client.Do(req)
if err != nil { if err != nil {
log.Error("Error while performing request")
return nil, err return nil, err
} }
@ -146,6 +151,7 @@ func (ccms *CCMetricStore) doRequest(
var resBody ApiQueryResponse var resBody ApiQueryResponse
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil { if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
log.Warn("Error while decoding result body")
return nil, err return nil, err
} }
@ -161,6 +167,7 @@ func (ccms *CCMetricStore) LoadData(
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes) queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
if err != nil { if err != nil {
log.Warn("Error while building queries")
return nil, err return nil, err
} }
@ -175,6 +182,7 @@ func (ccms *CCMetricStore) LoadData(
resBody, err := ccms.doRequest(ctx, &req) resBody, err := ccms.doRequest(ctx, &req)
if err != nil { if err != nil {
log.Error("Error while performing request")
return nil, err return nil, err
} }
@ -202,6 +210,7 @@ func (ccms *CCMetricStore) LoadData(
for _, res := range row { for _, res := range row {
if res.Error != nil { if res.Error != nil {
/* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
continue continue
} }
@ -245,7 +254,8 @@ func (ccms *CCMetricStore) LoadData(
} }
if len(errors) != 0 { if len(errors) != 0 {
return jobData, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", ")) /* Returns list for "partial errors" */
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
} }
return jobData, nil return jobData, nil
@ -272,8 +282,8 @@ func (ccms *CCMetricStore) buildQueries(
remoteName := ccms.toRemoteName(metric) remoteName := ccms.toRemoteName(metric)
mc := archive.GetMetricConfig(job.Cluster, metric) mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil { if mc == nil {
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster) // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster) log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
continue continue
} }
@ -483,7 +493,7 @@ func (ccms *CCMetricStore) buildQueries(
continue continue
} }
return nil, nil, fmt.Errorf("TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope) return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
} }
} }
} }
@ -498,6 +508,7 @@ func (ccms *CCMetricStore) LoadStats(
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode})
if err != nil { if err != nil {
log.Warn("Error while building query")
return nil, err return nil, err
} }
@ -512,6 +523,7 @@ func (ccms *CCMetricStore) LoadStats(
resBody, err := ccms.doRequest(ctx, &req) resBody, err := ccms.doRequest(ctx, &req)
if err != nil { if err != nil {
log.Error("Error while performing request")
return nil, err return nil, err
} }
@ -521,7 +533,7 @@ func (ccms *CCMetricStore) LoadStats(
metric := ccms.toLocalName(query.Metric) metric := ccms.toLocalName(query.Metric)
data := res[0] data := res[0]
if data.Error != nil { if data.Error != nil {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
} }
metricdata, ok := stats[metric] metricdata, ok := stats[metric]
@ -531,7 +543,7 @@ func (ccms *CCMetricStore) LoadStats(
} }
if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() { if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
} }
metricdata[query.Hostname] = schema.MetricStatistics{ metricdata[query.Hostname] = schema.MetricStatistics{
@ -577,6 +589,7 @@ func (ccms *CCMetricStore) LoadNodeData(
resBody, err := ccms.doRequest(ctx, &req) resBody, err := ccms.doRequest(ctx, &req)
if err != nil { if err != nil {
log.Error("Error while performing request")
return nil, err return nil, err
} }
@ -593,11 +606,12 @@ func (ccms *CCMetricStore) LoadNodeData(
metric := ccms.toLocalName(query.Metric) metric := ccms.toLocalName(query.Metric)
qdata := res[0] qdata := res[0]
if qdata.Error != nil { if qdata.Error != nil {
/* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error)) errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error))
} }
if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() { if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() {
// return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") // return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0. qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0.
} }
@ -627,7 +641,8 @@ func (ccms *CCMetricStore) LoadNodeData(
} }
if len(errors) != 0 { if len(errors) != 0 {
return data, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", ")) /* Returns list of "partial errors" */
return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
} }
return data, nil return data, nil

View File

@ -10,11 +10,11 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"log"
"strings" "strings"
"time" "time"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
influxdb2 "github.com/influxdata/influxdb-client-go/v2" influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api" influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
@ -37,6 +37,7 @@ type InfluxDBv2DataRepository struct {
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error { func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
var config InfluxDBv2DataRepositoryConfig var config InfluxDBv2DataRepositoryConfig
if err := json.Unmarshal(rawConfig, &config); err != nil { if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err return err
} }
@ -71,7 +72,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
for _, h := range job.Resources { for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil { if h.HWThreads != nil || h.Accelerators != nil {
// TODO // TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators") return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
} }
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname)) hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
} }
@ -84,7 +85,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
switch scope { switch scope {
case "node": case "node":
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows // Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
// log.Println("Note: Scope 'node' requested. ") // log.Info("Scope 'node' requested. ")
query = fmt.Sprintf(` query = fmt.Sprintf(`
from(bucket: "%s") from(bucket: "%s")
|> range(start: %s, stop: %s) |> range(start: %s, stop: %s)
@ -97,10 +98,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))), idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
measurementsCond, hostsCond) measurementsCond, hostsCond)
case "socket": case "socket":
log.Println("Note: Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ") log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
continue continue
case "core": case "core":
log.Println("Note: Scope 'core' requested, but not yet supported: Will return 'node' scope only. ") log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
continue continue
// Get Finest Granularity only, Set NULL to 0.0 // Get Finest Granularity only, Set NULL to 0.0
// query = fmt.Sprintf(` // query = fmt.Sprintf(`
@ -114,13 +115,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )), // idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
// measurementsCond, hostsCond) // measurementsCond, hostsCond)
default: default:
log.Println("Note: Unknown Scope requested: Will return 'node' scope. ") log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
continue continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node'") // return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
} }
rows, err := idb.queryClient.Query(ctx, query) rows, err := idb.queryClient.Query(ctx, query)
if err != nil { if err != nil {
log.Error("Error while performing query")
return nil, err return nil, err
} }
@ -192,6 +194,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// hostSeries.Data = append(hostSeries.Data, schema.Float(val)) // hostSeries.Data = append(hostSeries.Data, schema.Float(val))
// } // }
default: default:
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
continue continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'") // return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
} }
@ -202,21 +205,22 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// Get Stats // Get Stats
stats, err := idb.LoadStats(job, metrics, ctx) stats, err := idb.LoadStats(job, metrics, ctx)
if err != nil { if err != nil {
log.Warn("Error while loading statistics")
return nil, err return nil, err
} }
for _, scope := range scopes { for _, scope := range scopes {
if scope == "node" { // No 'socket/core' support yet if scope == "node" { // No 'socket/core' support yet
for metric, nodes := range stats { for metric, nodes := range stats {
// log.Println(fmt.Sprintf("<< Add Stats for : Field %s >>", metric)) // log.Debugf("<< Add Stats for : Field %s >>", metric)
for node, stats := range nodes { for node, stats := range nodes {
// log.Println(fmt.Sprintf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg )) // log.Debugf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg )
for index, _ := range jobData[metric][scope].Series { for index, _ := range jobData[metric][scope].Series {
// log.Println(fmt.Sprintf("<< Try to add Stats to Series in Position %d >>", index)) // log.Debugf("<< Try to add Stats to Series in Position %d >>", index)
if jobData[metric][scope].Series[index].Hostname == node { if jobData[metric][scope].Series[index].Hostname == node {
// log.Println(fmt.Sprintf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname)) // log.Debugf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname)
jobData[metric][scope].Series[index].Statistics = &schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max} jobData[metric][scope].Series[index].Statistics = &schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
// log.Println(fmt.Sprintf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg)) // log.Debugf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg)
} }
} }
} }
@ -228,9 +232,9 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// for _, scope := range scopes { // for _, scope := range scopes {
// for _, met := range metrics { // for _, met := range metrics {
// for _, series := range jobData[met][scope].Series { // for _, series := range jobData[met][scope].Series {
// log.Println(fmt.Sprintf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>", // log.Debugf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// len(series.Data), met, series.Hostname, scope, // len(series.Data), met, series.Hostname, scope,
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg)) // series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg)
// } // }
// } // }
// } // }
@ -249,7 +253,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
for _, h := range job.Resources { for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil { if h.HWThreads != nil || h.Accelerators != nil {
// TODO // TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators") return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
} }
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname)) hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
} }
@ -258,7 +262,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
// lenMet := len(metrics) // lenMet := len(metrics)
for _, metric := range metrics { for _, metric := range metrics {
// log.Println(fmt.Sprintf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)) // log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
query := fmt.Sprintf(` query := fmt.Sprintf(`
data = from(bucket: "%s") data = from(bucket: "%s")
@ -275,6 +279,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
rows, err := idb.queryClient.Query(ctx, query) rows, err := idb.queryClient.Query(ctx, query)
if err != nil { if err != nil {
log.Error("Error while performing query")
return nil, err return nil, err
} }
@ -285,17 +290,17 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
avg, avgok := row.ValueByKey("avg").(float64) avg, avgok := row.ValueByKey("avg").(float64)
if !avgok { if !avgok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)) // log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
avg = 0.0 avg = 0.0
} }
min, minok := row.ValueByKey("min").(float64) min, minok := row.ValueByKey("min").(float64)
if !minok { if !minok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)) // log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
min = 0.0 min = 0.0
} }
max, maxok := row.ValueByKey("max").(float64) max, maxok := row.ValueByKey("max").(float64)
if !maxok { if !maxok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)) // log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
max = 0.0 max = 0.0
} }
@ -319,7 +324,7 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
// TODO : Implement to be used in Analysis- und System/Node-View // TODO : Implement to be used in Analysis- und System/Node-View
log.Println(fmt.Sprintf("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)) log.Infof("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
return nil, errors.New("unimplemented for InfluxDBv2DataRepository") return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
} }

View File

@ -46,6 +46,7 @@ func Init(disableArchive bool) error {
Kind string `json:"kind"` Kind string `json:"kind"`
} }
if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil { if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil {
log.Warn("Error while unmarshaling raw json MetricDataRepository")
return err return err
} }
@ -60,10 +61,11 @@ func Init(disableArchive bool) error {
case "test": case "test":
mdr = &TestMetricDataRepository{} mdr = &TestMetricDataRepository{}
default: default:
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", kind.Kind, cluster.Name) return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
} }
if err := mdr.Init(cluster.MetricDataRepository); err != nil { if err := mdr.Init(cluster.MetricDataRepository); err != nil {
log.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
return err return err
} }
metricDataRepos[cluster.Name] = mdr metricDataRepos[cluster.Name] = mdr
@ -90,7 +92,7 @@ func LoadData(job *schema.Job,
repo, ok := metricDataRepos[job.Cluster] repo, ok := metricDataRepos[job.Cluster]
if !ok { if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0 return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
} }
if scopes == nil { if scopes == nil {
@ -107,8 +109,9 @@ func LoadData(job *schema.Job,
jd, err = repo.LoadData(job, metrics, scopes, ctx) jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil { if err != nil {
if len(jd) != 0 { if len(jd) != 0 {
log.Errorf("partial error: %s", err.Error()) log.Warnf("partial error: %s", err.Error())
} else { } else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0 return err, 0, 0
} }
} }
@ -116,6 +119,7 @@ func LoadData(job *schema.Job,
} else { } else {
jd, err = archive.GetHandle().LoadJobData(job) jd, err = archive.GetHandle().LoadJobData(job)
if err != nil { if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0 return err, 0, 0
} }
@ -163,6 +167,7 @@ func LoadData(job *schema.Job,
}) })
if err, ok := data.(error); ok { if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err return nil, err
} }
@ -182,11 +187,12 @@ func LoadAverages(
repo, ok := metricDataRepos[job.Cluster] repo, ok := metricDataRepos[job.Cluster]
if !ok { if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster) return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
} }
stats, err := repo.LoadStats(job, metrics, ctx) stats, err := repo.LoadStats(job, metrics, ctx)
if err != nil { if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err return err
} }
@ -217,7 +223,7 @@ func LoadNodeData(
repo, ok := metricDataRepos[cluster] repo, ok := metricDataRepos[cluster]
if !ok { if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster) return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
} }
if metrics == nil { if metrics == nil {
@ -229,14 +235,15 @@ func LoadNodeData(
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil { if err != nil {
if len(data) != 0 { if len(data) != 0 {
log.Errorf("partial error: %s", err.Error()) log.Warnf("partial error: %s", err.Error())
} else { } else {
log.Error("Error while loading node data from metric repository")
return nil, err return nil, err
} }
} }
if data == nil { if data == nil {
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", cluster) return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
} }
return data, nil return data, nil
@ -303,6 +310,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
jobData, err := LoadData(job, allMetrics, scopes, ctx) jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil { if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err return nil, err
} }

View File

@ -5,46 +5,46 @@
package metricdata package metricdata
import ( import (
"os" "bytes"
"errors"
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"strings"
"text/template"
"bytes"
"net/http"
"time"
"math" "math"
"sort" "net/http"
"os"
"regexp" "regexp"
"sort"
"strings"
"sync" "sync"
"text/template"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
promapi "github.com/prometheus/client_golang/api" promapi "github.com/prometheus/client_golang/api"
promv1 "github.com/prometheus/client_golang/api/prometheus/v1" promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
promcfg "github.com/prometheus/common/config" promcfg "github.com/prometheus/common/config"
promm "github.com/prometheus/common/model" promm "github.com/prometheus/common/model"
) )
type PrometheusDataRepositoryConfig struct { type PrometheusDataRepositoryConfig struct {
Url string `json:"url"` Url string `json:"url"`
Username string `json:"username,omitempty"` Username string `json:"username,omitempty"`
Suffix string `json:"suffix,omitempty"` Suffix string `json:"suffix,omitempty"`
Templates map[string]string `json:"query-templates"` Templates map[string]string `json:"query-templates"`
} }
type PrometheusDataRepository struct { type PrometheusDataRepository struct {
client promapi.Client client promapi.Client
queryClient promv1.API queryClient promv1.API
suffix string suffix string
templates map[string]*template.Template templates map[string]*template.Template
} }
type PromQLArgs struct { type PromQLArgs struct {
Nodes string Nodes string
} }
type Trie map[rune]Trie type Trie map[rune]Trie
@ -60,10 +60,9 @@ func contains(s []schema.MetricScope, str schema.MetricScope) bool {
return false return false
} }
func MinMaxMean(data []schema.Float) (float64, float64, float64) { func MinMaxMean(data []schema.Float) (float64, float64, float64) {
if len(data) == 0 { if len(data) == 0 {
return 0.0, 0.0, 0.0 return 0.0, 0.0, 0.0
} }
min := math.MaxFloat64 min := math.MaxFloat64
max := -math.MaxFloat64 max := -math.MaxFloat64
@ -75,85 +74,92 @@ func MinMaxMean(data []schema.Float) (float64, float64, float64) {
} }
sum += float64(val) sum += float64(val)
n += 1 n += 1
if float64(val) > max {max = float64(val)} if float64(val) > max {
if float64(val) < min {min = float64(val)} max = float64(val)
}
if float64(val) < min {
min = float64(val)
}
} }
return min, max, sum / n return min, max, sum / n
} }
// Rewritten from // Rewritten from
// https://github.com/ermanh/trieregex/blob/master/trieregex/trieregex.py // https://github.com/ermanh/trieregex/blob/master/trieregex/trieregex.py
func nodeRegex(nodes []string) string { func nodeRegex(nodes []string) string {
root := Trie{} root := Trie{}
// add runes of each compute node to trie // add runes of each compute node to trie
for _, node := range nodes { for _, node := range nodes {
_trie := root _trie := root
for _, c := range node { for _, c := range node {
if _, ok := _trie[c]; !ok {_trie[c] = Trie{}} if _, ok := _trie[c]; !ok {
_trie = _trie[c] _trie[c] = Trie{}
} }
_trie['*'] = Trie{} _trie = _trie[c]
} }
// recursively build regex from rune trie _trie['*'] = Trie{}
var trieRegex func(trie Trie, reset bool) string }
trieRegex = func(trie Trie, reset bool) string { // recursively build regex from rune trie
if reset == true { var trieRegex func(trie Trie, reset bool) string
trie = root trieRegex = func(trie Trie, reset bool) string {
} if reset == true {
if len(trie) == 0 { trie = root
return "" }
} if len(trie) == 0 {
if len(trie) == 1 { return ""
for key, _trie := range trie { }
if key == '*' { return "" } if len(trie) == 1 {
return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false) for key, _trie := range trie {
} if key == '*' {
} else { return ""
sequences := []string{} }
for key, _trie := range trie { return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)
if key != '*' { }
sequences = append(sequences, regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)) } else {
} sequences := []string{}
} for key, _trie := range trie {
sort.Slice(sequences, func(i, j int) bool { if key != '*' {
return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j]) sequences = append(sequences, regexp.QuoteMeta(string(key))+trieRegex(_trie, false))
}) }
var result string }
// single edge from this tree node sort.Slice(sequences, func(i, j int) bool {
if len(sequences) == 1 { return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j])
result = sequences[0] })
if len(result) > 1 { var result string
result = "(?:" + result + ")" // single edge from this tree node
} if len(sequences) == 1 {
// multiple edges, each length 1 result = sequences[0]
} else if s := strings.Join(sequences, ""); len(s) == len(sequences) { if len(result) > 1 {
// char or numeric range result = "(?:" + result + ")"
if len(s)-1 == int(s[len(s)-1]) - int(s[0]) { }
result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1]) // multiple edges, each length 1
// char or numeric set } else if s := strings.Join(sequences, ""); len(s) == len(sequences) {
} else { // char or numeric range
result = "[" + s + "]" if len(s)-1 == int(s[len(s)-1])-int(s[0]) {
} result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1])
// multiple edges of different lengths // char or numeric set
} else { } else {
result = "(?:" + strings.Join(sequences, "|") + ")" result = "[" + s + "]"
} }
if _, ok := trie['*']; ok { result += "?"} // multiple edges of different lengths
return result } else {
} result = "(?:" + strings.Join(sequences, "|") + ")"
return "" }
} if _, ok := trie['*']; ok {
return trieRegex(root, true) result += "?"
}
return result
}
return ""
}
return trieRegex(root, true)
} }
func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error { func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
var config PrometheusDataRepositoryConfig var config PrometheusDataRepositoryConfig
// parse config // parse config
if err := json.Unmarshal(rawConfig, &config); err != nil { if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err return err
} }
// support basic authentication // support basic authentication
@ -163,15 +169,16 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper) rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
} else { } else {
if config.Username != "" { if config.Username != "" {
return errors.New("Prometheus username provided, but PROMETHEUS_PASSWORD not set.") return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
} }
} }
// init client // init client
client, err := promapi.NewClient(promapi.Config{ client, err := promapi.NewClient(promapi.Config{
Address: config.Url, Address: config.Url,
RoundTripper: rt, RoundTripper: rt,
}) })
if err != nil { if err != nil {
log.Error("Error while initializing new prometheus client")
return err return err
} }
// init query client // init query client
@ -186,15 +193,12 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
if err == nil { if err == nil {
log.Debugf("Added PromQL template for %s: %s", metric, templ) log.Debugf("Added PromQL template for %s: %s", metric, templ)
} else { } else {
log.Errorf("Failed to parse PromQL template %s for metric %s", templ, metric) log.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric)
} }
} }
return nil return nil
} }
// TODO: respect scope argument // TODO: respect scope argument
func (pdb *PrometheusDataRepository) FormatQuery( func (pdb *PrometheusDataRepository) FormatQuery(
metric string, metric string,
@ -213,53 +217,47 @@ func (pdb *PrometheusDataRepository) FormatQuery(
if templ, ok := pdb.templates[metric]; ok { if templ, ok := pdb.templates[metric]; ok {
err := templ.Execute(buf, args) err := templ.Execute(buf, args)
if err != nil { if err != nil {
return "", errors.New(fmt.Sprintf("Error compiling template %v", templ)) return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ))
} else { } else {
query := buf.String() query := buf.String()
log.Debugf(fmt.Sprintf("PromQL: %s", query)) log.Debugf("PromQL: %s", query)
return query, nil return query, nil
} }
} else { } else {
return "", errors.New(fmt.Sprintf("No PromQL for metric %s configured.", metric)) return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > No PromQL for metric %s configured.", metric))
} }
} }
// Convert PromAPI row to CC schema.Series // Convert PromAPI row to CC schema.Series
func (pdb *PrometheusDataRepository) RowToSeries( func (pdb *PrometheusDataRepository) RowToSeries(
from time.Time, from time.Time,
step int64, step int64,
steps int64, steps int64,
row *promm.SampleStream) (schema.Series) { row *promm.SampleStream) schema.Series {
ts := from.Unix() ts := from.Unix()
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix) hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
// init array of expected length with NaN // init array of expected length with NaN
values := make([]schema.Float, steps + 1) values := make([]schema.Float, steps+1)
for i, _ := range values { for i, _ := range values {
values[i] = schema.NaN values[i] = schema.NaN
}
// copy recorded values from prom sample pair
for _, v := range row.Values {
idx := (v.Timestamp.Unix() - ts) / step
values[idx] = schema.Float(v.Value)
}
min, max, mean := MinMaxMean(values)
// output struct
return schema.Series{
Hostname: hostname,
Data: values,
Statistics: &schema.MetricStatistics{
Avg: mean,
Min: min,
Max: max,
},
}
} }
// copy recorded values from prom sample pair
for _, v := range row.Values {
idx := (v.Timestamp.Unix() - ts) / step
values[idx] = schema.Float(v.Value)
}
min, max, mean := MinMaxMean(values)
// output struct
return schema.Series{
Hostname: hostname,
Data: values,
Statistics: &schema.MetricStatistics{
Avg: mean,
Min: min,
Max: max,
},
}
}
func (pdb *PrometheusDataRepository) LoadData( func (pdb *PrometheusDataRepository) LoadData(
job *schema.Job, job *schema.Job,
@ -268,7 +266,7 @@ func (pdb *PrometheusDataRepository) LoadData(
ctx context.Context) (schema.JobData, error) { ctx context.Context) (schema.JobData, error) {
// TODO respect requested scope // TODO respect requested scope
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode){ if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode) scopes = append(scopes, schema.MetricScopeNode)
} }
@ -283,36 +281,38 @@ func (pdb *PrometheusDataRepository) LoadData(
for _, scope := range scopes { for _, scope := range scopes {
if scope != schema.MetricScopeNode { if scope != schema.MetricScopeNode {
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))}) logOnce.Do(func() {
log.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue continue
} }
for _, metric := range metrics { for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(job.Cluster, metric) metricConfig := archive.GetMetricConfig(job.Cluster, metric)
if metricConfig == nil { if metricConfig == nil {
log.Errorf(fmt.Sprintf("Error in LoadData: Metric %s for cluster %s not configured", log.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
metric, job.Cluster)) return nil, errors.New("Prometheus config error")
return nil, errors.New("Prometheus querry error")
} }
query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster) query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster)
if err != nil { if err != nil {
log.Warn("Error while formatting prometheus query")
return nil, err return nil, err
} }
// ranged query over all job nodes // ranged query over all job nodes
r := promv1.Range{ r := promv1.Range{
Start: from, Start: from,
End: to, End: to,
Step: time.Duration(metricConfig.Timestep * 1e9), Step: time.Duration(metricConfig.Timestep * 1e9),
} }
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r) result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil { if err != nil {
log.Errorf(fmt.Sprintf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)) log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
return nil, errors.New("Prometheus querry error") return nil, errors.New("Prometheus query error")
} }
if len(warnings) > 0 { if len(warnings) > 0 {
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings)) log.Warnf("Warnings: %v\n", warnings)
} }
// init data structures // init data structures
@ -338,16 +338,13 @@ func (pdb *PrometheusDataRepository) LoadData(
} }
// sort by hostname to get uniform coloring // sort by hostname to get uniform coloring
sort.Slice(jobMetric.Series, func(i, j int) bool { sort.Slice(jobMetric.Series, func(i, j int) bool {
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname) return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
}) })
} }
} }
return jobData, nil return jobData, nil
} }
// TODO change implementation to precomputed/cached stats // TODO change implementation to precomputed/cached stats
func (pdb *PrometheusDataRepository) LoadStats( func (pdb *PrometheusDataRepository) LoadStats(
job *schema.Job, job *schema.Job,
@ -359,6 +356,7 @@ func (pdb *PrometheusDataRepository) LoadStats(
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx) data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil { if err != nil {
log.Warn("Error while loading job for stats")
return nil, err return nil, err
} }
for metric, metricData := range data { for metric, metricData := range data {
@ -371,9 +369,6 @@ func (pdb *PrometheusDataRepository) LoadStats(
return stats, nil return stats, nil
} }
func (pdb *PrometheusDataRepository) LoadNodeData( func (pdb *PrometheusDataRepository) LoadNodeData(
cluster string, cluster string,
metrics, nodes []string, metrics, nodes []string,
@ -390,35 +385,37 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
} }
for _, scope := range scopes { for _, scope := range scopes {
if scope != schema.MetricScopeNode { if scope != schema.MetricScopeNode {
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))}) logOnce.Do(func() {
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue continue
} }
for _, metric := range metrics { for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(cluster, metric) metricConfig := archive.GetMetricConfig(cluster, metric)
if metricConfig == nil { if metricConfig == nil {
log.Errorf(fmt.Sprintf("Error in LoadNodeData: Metric %s for cluster %s not configured", log.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
metric, cluster)) return nil, errors.New("Prometheus config error")
return nil, errors.New("Prometheus querry error")
} }
query, err := pdb.FormatQuery(metric, scope, nodes, cluster) query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
if err != nil { if err != nil {
log.Warn("Error while formatting prometheus query")
return nil, err return nil, err
} }
// ranged query over all nodes // ranged query over all nodes
r := promv1.Range{ r := promv1.Range{
Start: from, Start: from,
End: to, End: to,
Step: time.Duration(metricConfig.Timestep * 1e9), Step: time.Duration(metricConfig.Timestep * 1e9),
} }
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r) result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil { if err != nil {
log.Errorf(fmt.Sprintf("Prometheus query error in LoadNodeData: %v\n", err)) log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
return nil, errors.New("Prometheus querry error") return nil, errors.New("Prometheus query error")
} }
if len(warnings) > 0 { if len(warnings) > 0 {
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings)) log.Warnf("Warnings: %v\n", warnings)
} }
step := int64(metricConfig.Timestep) step := int64(metricConfig.Timestep)
@ -437,13 +434,13 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
Unit: metricConfig.Unit, Unit: metricConfig.Unit,
Scope: scope, Scope: scope,
Timestep: metricConfig.Timestep, Timestep: metricConfig.Timestep,
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)}, Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
}, },
) )
} }
} }
} }
t1 := time.Since(t0) t1 := time.Since(t0)
log.Debugf(fmt.Sprintf("LoadNodeData of %v nodes took %s", len(data), t1)) log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
return data, nil return data, nil
} }

View File

@ -5,12 +5,15 @@
package repository package repository
import ( import (
"database/sql"
"fmt" "fmt"
"log" "log"
"sync" "sync"
"time" "time"
"github.com/jmoiron/sqlx" "github.com/jmoiron/sqlx"
"github.com/mattn/go-sqlite3"
"github.com/qustavo/sqlhooks/v2"
) )
var ( var (
@ -19,7 +22,8 @@ var (
) )
type DBConnection struct { type DBConnection struct {
DB *sqlx.DB DB *sqlx.DB
Driver string
} }
func Connect(driver string, db string) { func Connect(driver string, db string) {
@ -28,7 +32,9 @@ func Connect(driver string, db string) {
dbConnOnce.Do(func() { dbConnOnce.Do(func() {
if driver == "sqlite3" { if driver == "sqlite3" {
dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db)) sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
dbHandle, err = sqlx.Open("sqlite3WithHooks", fmt.Sprintf("%s?_foreign_keys=on", db))
// dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db))
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
@ -39,7 +45,7 @@ func Connect(driver string, db string) {
} else if driver == "mysql" { } else if driver == "mysql" {
dbHandle, err = sqlx.Open("mysql", fmt.Sprintf("%s?multiStatements=true", db)) dbHandle, err = sqlx.Open("mysql", fmt.Sprintf("%s?multiStatements=true", db))
if err != nil { if err != nil {
log.Fatal(err) log.Fatalf("sqlx.Open() error: %v", err)
} }
dbHandle.SetConnMaxLifetime(time.Minute * 3) dbHandle.SetConnMaxLifetime(time.Minute * 3)
@ -49,7 +55,8 @@ func Connect(driver string, db string) {
log.Fatalf("unsupported database driver: %s", driver) log.Fatalf("unsupported database driver: %s", driver)
} }
dbConnInstance = &DBConnection{DB: dbHandle} dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
checkDBVersion(driver, dbHandle.DB)
}) })
} }

View File

@ -0,0 +1,28 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
// Hooks satisfies the sqlhook.Hooks interface
type Hooks struct{}
// Before hook will print the query with it's args and return the context with the timestamp
func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
log.Infof("SQL query %s %q", query, args)
return context.WithValue(ctx, "begin", time.Now()), nil
}
// After hook will get the timestamp registered on the Before hook and print the elapsed time
func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
begin := ctx.Value("begin").(time.Time)
log.Infof("Took: %s\n", time.Since(begin))
return ctx, nil
}

View File

@ -19,67 +19,6 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
) )
// `AUTO_INCREMENT` is in a comment because of this hack:
// https://stackoverflow.com/a/41028314 (sqlite creates unique ids automatically)
const JobsDBSchema string = `
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tag;
CREATE TABLE job (
id INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
`
// Indexes are created after the job-archive is traversed for faster inserts.
const JobsDbIndexes string = `
CREATE INDEX job_by_user ON job (user);
CREATE INDEX job_by_starttime ON job (start_time);
CREATE INDEX job_by_job_id ON job (job_id);
CREATE INDEX job_by_state ON job (job_state);
`
const NamedJobInsert string = `INSERT INTO job ( const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc, job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data, exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
@ -95,40 +34,44 @@ func HandleImportFlag(flag string) error {
for _, pair := range strings.Split(flag, ",") { for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":") files := strings.Split(pair, ":")
if len(files) != 2 { if len(files) != 2 {
return fmt.Errorf("invalid import flag format") return fmt.Errorf("REPOSITORY/INIT > invalid import flag format")
} }
raw, err := os.ReadFile(files[0]) raw, err := os.ReadFile(files[0])
if err != nil { if err != nil {
log.Warn("Error while reading metadata file for import")
return err return err
} }
if config.Keys.Validate { if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil { if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job meta: %v", err) return fmt.Errorf("REPOSITORY/INIT > validate job meta: %v", err)
} }
} }
dec := json.NewDecoder(bytes.NewReader(raw)) dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields() dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults} jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil { if err := dec.Decode(&jobMeta); err != nil {
log.Warn("Error while decoding raw json metadata for import")
return err return err
} }
raw, err = os.ReadFile(files[1]) raw, err = os.ReadFile(files[1])
if err != nil { if err != nil {
log.Warn("Error while reading jobdata file for import")
return err return err
} }
if config.Keys.Validate { if config.Keys.Validate {
if err := schema.Validate(schema.Data, bytes.NewReader(raw)); err != nil { if err := schema.Validate(schema.Data, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job data: %v", err) return fmt.Errorf("REPOSITORY/INIT > validate job data: %v", err)
} }
} }
dec = json.NewDecoder(bytes.NewReader(raw)) dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields() dec.DisallowUnknownFields()
jobData := schema.JobData{} jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil { if err := dec.Decode(&jobData); err != nil {
log.Warn("Error while decoding raw json jobdata for import")
return err return err
} }
@ -136,10 +79,11 @@ func HandleImportFlag(flag string) error {
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows { if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil { if err != nil {
log.Warn("Error while finding job in jobRepository")
return err return err
} }
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID) return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
} }
job := schema.Job{ job := schema.Job{
@ -155,38 +99,45 @@ func HandleImportFlag(flag string) error {
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw") job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources) job.RawResources, err = json.Marshal(job.Resources)
if err != nil { if err != nil {
log.Warn("Error while marshaling job resources")
return err return err
} }
job.RawMetaData, err = json.Marshal(job.MetaData) job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil { if err != nil {
log.Warn("Error while marshaling job metadata")
return err return err
} }
if err := SanityChecks(&job.BaseJob); err != nil { if err := SanityChecks(&job.BaseJob); err != nil {
log.Warn("BaseJob SanityChecks failed")
return err return err
} }
if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil { if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
log.Error("Error while importing job")
return err return err
} }
res, err := GetConnection().DB.NamedExec(NamedJobInsert, job) res, err := GetConnection().DB.NamedExec(NamedJobInsert, job)
if err != nil { if err != nil {
log.Warn("Error while NamedJobInsert")
return err return err
} }
id, err := res.LastInsertId() id, err := res.LastInsertId()
if err != nil { if err != nil {
log.Warn("Error while getting last insert ID")
return err return err
} }
for _, tag := range job.Tags { for _, tag := range job.Tags {
if _, err := GetJobRepository().AddTagOrCreate(id, tag.Type, tag.Name); err != nil { if _, err := GetJobRepository().AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
log.Error("Error while adding or creating tag")
return err return err
} }
} }
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id) log.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
} }
return nil return nil
} }
@ -198,21 +149,17 @@ func InitDB() error {
starttime := time.Now() starttime := time.Now()
log.Print("Building job table...") log.Print("Building job table...")
// Basic database structure:
_, err := db.DB.Exec(JobsDBSchema)
if err != nil {
return err
}
// Inserts are bundled into transactions because in sqlite, // Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT. // that speeds up inserts A LOT.
tx, err := db.DB.Beginx() tx, err := db.DB.Beginx()
if err != nil { if err != nil {
log.Warn("Error while bundling transactions")
return err return err
} }
stmt, err := tx.PrepareNamed(NamedJobInsert) stmt, err := tx.PrepareNamed(NamedJobInsert)
if err != nil { if err != nil {
log.Warn("Error while preparing namedJobInsert")
return err return err
} }
tags := make(map[string]int64) tags := make(map[string]int64)
@ -232,12 +179,14 @@ func InitDB() error {
if i%10 == 0 { if i%10 == 0 {
if tx != nil { if tx != nil {
if err := tx.Commit(); err != nil { if err := tx.Commit(); err != nil {
log.Warn("Error while committing transactions for jobMeta")
return err return err
} }
} }
tx, err = db.DB.Beginx() tx, err = db.DB.Beginx()
if err != nil { if err != nil {
log.Warn("Error while bundling transactions for jobMeta")
return err return err
} }
@ -260,34 +209,34 @@ func InitDB() error {
job.RawResources, err = json.Marshal(job.Resources) job.RawResources, err = json.Marshal(job.Resources)
if err != nil { if err != nil {
log.Errorf("repository initDB()- %v", err) log.Errorf("repository initDB(): %v", err)
errorOccured++ errorOccured++
continue continue
} }
job.RawMetaData, err = json.Marshal(job.MetaData) job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil { if err != nil {
log.Errorf("repository initDB()- %v", err) log.Errorf("repository initDB(): %v", err)
errorOccured++ errorOccured++
continue continue
} }
if err := SanityChecks(&job.BaseJob); err != nil { if err := SanityChecks(&job.BaseJob); err != nil {
log.Errorf("repository initDB()- %v", err) log.Errorf("repository initDB(): %v", err)
errorOccured++ errorOccured++
continue continue
} }
res, err := stmt.Exec(job) res, err := stmt.Exec(job)
if err != nil { if err != nil {
log.Errorf("repository initDB()- %v", err) log.Errorf("repository initDB(): %v", err)
errorOccured++ errorOccured++
continue continue
} }
id, err := res.LastInsertId() id, err := res.LastInsertId()
if err != nil { if err != nil {
log.Errorf("repository initDB()- %v", err) log.Errorf("repository initDB(): %v", err)
errorOccured++ errorOccured++
continue continue
} }
@ -298,16 +247,19 @@ func InitDB() error {
if !ok { if !ok {
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type) res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil { if err != nil {
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
return err return err
} }
tagId, err = res.LastInsertId() tagId, err = res.LastInsertId()
if err != nil { if err != nil {
log.Warn("Error while getting last insert ID")
return err return err
} }
tags[tagstr] = tagId tags[tagstr] = tagId
} }
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil { if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", id, tagId)
return err return err
} }
} }
@ -318,16 +270,11 @@ func InitDB() error {
} }
if errorOccured > 0 { if errorOccured > 0 {
log.Errorf("Error in import of %d jobs!", errorOccured) log.Warnf("Error in import of %d jobs!", errorOccured)
} }
if err := tx.Commit(); err != nil { if err := tx.Commit(); err != nil {
return err log.Warn("Error while committing SQL transactions")
}
// Create indexes after inserts so that they do not
// need to be continually updated.
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
return err return err
} }
@ -338,13 +285,14 @@ func InitDB() error {
// This function also sets the subcluster if necessary! // This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error { func SanityChecks(job *schema.BaseJob) error {
if c := archive.GetCluster(job.Cluster); c == nil { if c := archive.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster) return fmt.Errorf("no such cluster: %v", job.Cluster)
} }
if err := archive.AssignSubCluster(job); err != nil { if err := archive.AssignSubCluster(job); err != nil {
log.Warn("Error while assigning subcluster to job")
return err return err
} }
if !job.State.Valid() { if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State) return fmt.Errorf("not a valid job state: %v", job.State)
} }
if len(job.Resources) == 0 || len(job.User) == 0 { if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty") return fmt.Errorf("'resources' and 'user' should not be empty")

View File

@ -14,9 +14,11 @@ import (
"sync" "sync"
"time" "time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache" "github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
@ -30,7 +32,8 @@ var (
) )
type JobRepository struct { type JobRepository struct {
DB *sqlx.DB DB *sqlx.DB
driver string
stmtCache *sq.StmtCache stmtCache *sq.StmtCache
cache *lrucache.Cache cache *lrucache.Cache
@ -44,9 +47,11 @@ func GetJobRepository() *JobRepository {
db := GetConnection() db := GetConnection()
jobRepoInstance = &JobRepository{ jobRepoInstance = &JobRepository{
DB: db.DB, DB: db.DB,
stmtCache: sq.NewStmtCache(db.DB), driver: db.Driver,
cache: lrucache.New(1024 * 1024),
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
archiveChannel: make(chan *schema.Job, 128), archiveChannel: make(chan *schema.Job, 128),
} }
// start archiving worker // start archiving worker
@ -67,14 +72,20 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
if err := row.Scan( if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId, &job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State, &job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources /*&job.MetaData*/); err != nil { &job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil {
log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil { if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
log.Warn("Error while unmarhsaling raw resources json")
return nil, err return nil, err
} }
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err
// }
job.StartTime = time.Unix(job.StartTimeUnix, 0) job.StartTime = time.Unix(job.StartTimeUnix, 0)
if job.Duration == 0 && job.State == schema.JobStateRunning { if job.Duration == 0 && job.State == schema.JobStateRunning {
job.Duration = int32(time.Since(job.StartTime).Seconds()) job.Duration = int32(time.Since(job.StartTime).Seconds())
@ -84,11 +95,14 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
return job, nil return job, nil
} }
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) { func (r *JobRepository) FetchJobName(job *schema.Job) (*string, error) {
start := time.Now()
cachekey := fmt.Sprintf("metadata:%d", job.ID) cachekey := fmt.Sprintf("metadata:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil { if cached := r.cache.Get(cachekey, nil); cached != nil {
job.MetaData = cached.(map[string]string) job.MetaData = cached.(map[string]string)
return job.MetaData, nil if jobName := job.MetaData["jobName"]; jobName != "" {
return &jobName, nil
}
} }
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID). if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
@ -105,6 +119,40 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
} }
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour) r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
log.Infof("Timer FetchJobName %s", time.Since(start))
if jobName := job.MetaData["jobName"]; jobName != "" {
return &jobName, nil
} else {
return new(string), nil
}
}
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
start := time.Now()
cachekey := fmt.Sprintf("metadata:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.MetaData = cached.(map[string]string)
return job.MetaData, nil
}
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
log.Warn("Error while scanning for job metadata")
return nil, err
}
if len(job.RawMetaData) == 0 {
return nil, nil
}
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
log.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
log.Infof("Timer FetchMetadata %s", time.Since(start))
return job.MetaData, nil return job.MetaData, nil
} }
@ -113,6 +161,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
r.cache.Del(cachekey) r.cache.Del(cachekey)
if job.MetaData == nil { if job.MetaData == nil {
if _, err = r.FetchMetadata(job); err != nil { if _, err = r.FetchMetadata(job); err != nil {
log.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
return err return err
} }
} }
@ -129,10 +178,12 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
} }
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil { if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
log.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
return err return err
} }
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil { if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
return err return err
} }
@ -150,6 +201,7 @@ func (r *JobRepository) Find(
cluster *string, cluster *string,
startTime *int64) (*schema.Job, error) { startTime *int64) (*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job"). q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId) Where("job.job_id = ?", *jobId)
@ -160,6 +212,7 @@ func (r *JobRepository) Find(
q = q.Where("job.start_time = ?", *startTime) q = q.Where("job.start_time = ?", *startTime)
} }
log.Infof("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow()) return scanJob(q.RunWith(r.stmtCache).QueryRow())
} }
@ -173,6 +226,7 @@ func (r *JobRepository) FindAll(
cluster *string, cluster *string,
startTime *int64) ([]*schema.Job, error) { startTime *int64) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job"). q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId) Where("job.job_id = ?", *jobId)
@ -185,6 +239,7 @@ func (r *JobRepository) FindAll(
rows, err := q.RunWith(r.stmtCache).Query() rows, err := q.RunWith(r.stmtCache).Query()
if err != nil { if err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
@ -192,10 +247,12 @@ func (r *JobRepository) FindAll(
for rows.Next() { for rows.Next() {
job, err := scanJob(rows) job, err := scanJob(rows)
if err != nil { if err != nil {
log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
jobs = append(jobs, job) jobs = append(jobs, job)
} }
log.Infof("Timer FindAll %s", time.Since(start))
return jobs, nil return jobs, nil
} }
@ -214,12 +271,12 @@ func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) { func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawResources, err = json.Marshal(job.Resources) job.RawResources, err = json.Marshal(job.Resources)
if err != nil { if err != nil {
return -1, fmt.Errorf("encoding resources field failed: %w", err) return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
} }
job.RawMetaData, err = json.Marshal(job.MetaData) job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil { if err != nil {
return -1, fmt.Errorf("encoding metaData field failed: %w", err) return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
} }
res, err := r.DB.NamedExec(`INSERT INTO job ( res, err := r.DB.NamedExec(`INSERT INTO job (
@ -259,7 +316,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime) _, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime)
if err != nil { if err != nil {
log.Warnf(" DeleteJobsBefore(%d): error %v", startTime, err) log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err)
} else { } else {
log.Infof("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt) log.Infof("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
} }
@ -269,7 +326,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
func (r *JobRepository) DeleteJobById(id int64) error { func (r *JobRepository) DeleteJobById(id int64) error {
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id) _, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id)
if err != nil { if err != nil {
log.Warnf("DeleteJobById(%d): error %v", id, err) log.Errorf("DeleteJobById(%d): error %#v", id, err)
} else { } else {
log.Infof("DeleteJobById(%d): Success", id) log.Infof("DeleteJobById(%d): Success", id)
} }
@ -278,6 +335,7 @@ func (r *JobRepository) DeleteJobById(id int64) error {
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC; // TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggregate, filters []*model.JobFilter, weight *model.Weights, limit *int) (map[string]int, error) { func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggregate, filters []*model.JobFilter, weight *model.Weights, limit *int) (map[string]int, error) {
start := time.Now()
if !aggreg.IsValid() { if !aggreg.IsValid() {
return nil, errors.New("invalid aggregate") return nil, errors.New("invalid aggregate")
} }
@ -292,10 +350,12 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
now := time.Now().Unix() now := time.Now().Unix()
count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now) count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now)
runner = r.DB runner = r.DB
default:
log.Infof("CountGroupedJobs() Weight %v unknown.", *weight)
} }
} }
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job." + string(aggreg)).OrderBy("count DESC")) q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job."+string(aggreg)).OrderBy("count DESC"))
if qerr != nil { if qerr != nil {
return nil, qerr return nil, qerr
@ -311,6 +371,7 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
counts := map[string]int{} counts := map[string]int{}
rows, err := q.RunWith(runner).Query() rows, err := q.RunWith(runner).Query()
if err != nil { if err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
@ -318,12 +379,14 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
var group string var group string
var count int var count int
if err := rows.Scan(&group, &count); err != nil { if err := rows.Scan(&group, &count); err != nil {
log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
counts[group] = count counts[group] = count
} }
log.Infof("Timer CountGroupedJobs %s", time.Since(start))
return counts, nil return counts, nil
} }
@ -360,20 +423,23 @@ func (r *JobRepository) MarkArchived(
stmt = stmt.Set("net_bw_avg", stats.Avg) stmt = stmt.Set("net_bw_avg", stats.Avg)
case "file_bw": case "file_bw":
stmt = stmt.Set("file_bw_avg", stats.Avg) stmt = stmt.Set("file_bw_avg", stats.Avg)
default:
log.Infof("MarkArchived() Metric '%v' unknown", metric)
} }
} }
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil { if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
log.Warn("Error while marking job as archived")
return err return err
} }
return nil return nil
} }
// Archiving worker thread // Archiving worker thread
func (r *JobRepository) archivingWorker(){ func (r *JobRepository) archivingWorker() {
for { for {
select { select {
case job, ok := <- r.archiveChannel: case job, ok := <-r.archiveChannel:
if !ok { if !ok {
break break
} }
@ -407,54 +473,216 @@ func (r *JobRepository) archivingWorker(){
} }
// Trigger async archiving // Trigger async archiving
func (r *JobRepository) TriggerArchiving(job *schema.Job){ func (r *JobRepository) TriggerArchiving(job *schema.Job) {
r.archivePending.Add(1) r.archivePending.Add(1)
r.archiveChannel <- job r.archiveChannel <- job
} }
// Wait for background thread to finish pending archiving operations // Wait for background thread to finish pending archiving operations
func (r *JobRepository) WaitForArchiving(){ func (r *JobRepository) WaitForArchiving() {
// close channel and wait for worker to process remaining jobs // close channel and wait for worker to process remaining jobs
r.archivePending.Wait() r.archivePending.Wait()
} }
var ErrNotFound = errors.New("no such job or user") var ErrNotFound = errors.New("no such jobname, project or user")
var ErrForbidden = errors.New("not authorized")
// FindJobOrUser returns a job database ID or a username if a job or user machtes the search term. // FindJobnameOrUserOrProject returns a jobName or a username or a projectId if a jobName or user or project matches the search term.
// As 0 is a valid job id, check if username is "" instead in order to check what machted. // If query is found to be an integer (= conversion to INT datatype succeeds), skip back to parent call
// If nothing matches the search, `ErrNotFound` is returned. // If nothing matches the search, `ErrNotFound` is returned.
func (r *JobRepository) FindJobOrUser(ctx context.Context, searchterm string) (job int64, username string, err error) {
func (r *JobRepository) FindJobnameOrUserOrProject(ctx context.Context, searchterm string) (metasnip string, username string, project string, err error) {
user := auth.GetUser(ctx) user := auth.GetUser(ctx)
if id, err := strconv.Atoi(searchterm); err == nil { if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
qb := sq.Select("job.id").From("job").Where("job.job_id = ?", id) return "", "", "", nil
} else { // has to have letters
if user != nil && user.HasNotRoles([]string{auth.RoleAdmin, auth.RoleSupport}) { if user != nil && user.HasNotRoles([]string{auth.RoleAdmin, auth.RoleSupport}) {
qb = qb.Where("job.user = ?", user.Username) err := sq.Select("job.user").Distinct().From("job").
Where("job.user = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if err != nil && err != sql.ErrNoRows {
return "", "", "", err
} else if err == nil {
return "", username, "", nil
}
if username == "" { // Try with Name2Username query
errtwo := sq.Select("user.username").Distinct().From("user").
Where("user.name LIKE ?", fmt.Sprint("%"+searchterm+"%")).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if errtwo != nil && errtwo != sql.ErrNoRows {
return "", "", "", errtwo
} else if errtwo == nil {
return "", username, "", nil
}
}
} }
err := qb.RunWith(r.stmtCache).QueryRow().Scan(&job) if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("job.project").Distinct().From("job").
Where("job.project = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&project)
if err != nil && err != sql.ErrNoRows {
return "", "", "", err
} else if err == nil {
return "", "", project, nil
}
}
// All Authorizations: If unlabeled query not username or projectId, try for jobname: Match Metadata, on hit, parent method redirects to jobName GQL query
err := sq.Select("job.cluster").Distinct().From("job").
Where("job.meta_data LIKE ?", "%"+searchterm+"%").
RunWith(r.stmtCache).QueryRow().Scan(&metasnip)
if err != nil && err != sql.ErrNoRows { if err != nil && err != sql.ErrNoRows {
return 0, "", err return "", "", "", err
} else if err == nil { } else if err == nil {
return job, "", nil return metasnip[0:1], "", "", nil
} }
}
return "", "", "", ErrNotFound
}
}
func (r *JobRepository) FindUser(ctx context.Context, searchterm string) (username string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasAnyRole([]string{auth.RoleAdmin, auth.RoleSupport}) { if user == nil || user.HasAnyRole([]string{auth.RoleAdmin, auth.RoleSupport}) {
err := sq.Select("job.user").Distinct().From("job"). err := sq.Select("job.user").Distinct().From("job").
Where("job.user = ?", searchterm). Where("job.user = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&username) RunWith(r.stmtCache).QueryRow().Scan(&username)
if err != nil && err != sql.ErrNoRows { if err != nil && err != sql.ErrNoRows {
return 0, "", err return "", err
} else if err == nil { } else if err == nil {
return 0, username, nil return username, nil
} }
} return "", ErrNotFound
return 0, "", ErrNotFound } else {
log.Infof("Non-Admin User %s : Requested Query Username -> %s: Forbidden", user.Name, searchterm)
return "", ErrForbidden
}
}
func (r *JobRepository) FindUserByName(ctx context.Context, searchterm string) (username string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("user.username").Distinct().From("user").
Where("user.name = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if err != nil && err != sql.ErrNoRows {
return "", err
} else if err == nil {
return username, nil
}
return "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Name -> %s: Forbidden", user.Name, searchterm)
return "", ErrForbidden
}
}
func (r *JobRepository) FindUsers(ctx context.Context, searchterm string) (usernames []string, err error) {
user := auth.GetUser(ctx)
emptyResult := make([]string, 0)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
rows, err := sq.Select("job.user").Distinct().From("job").
Where("job.user LIKE ?", fmt.Sprint("%", searchterm, "%")).
RunWith(r.stmtCache).Query()
if err != nil && err != sql.ErrNoRows {
return emptyResult, err
} else if err == nil {
for rows.Next() {
var name string
err := rows.Scan(&name)
if err != nil {
rows.Close()
log.Warnf("Error while scanning rows: %v", err)
return emptyResult, err
}
usernames = append(usernames, name)
}
return usernames, nil
}
return emptyResult, ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Usernames -> %s: Forbidden", user.Name, searchterm)
return emptyResult, ErrForbidden
}
}
func (r *JobRepository) FindUsersByName(ctx context.Context, searchterm string) (usernames []string, err error) {
user := auth.GetUser(ctx)
emptyResult := make([]string, 0)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
rows, err := sq.Select("user.username").Distinct().From("user").
Where("user.name LIKE ?", fmt.Sprint("%", searchterm, "%")).
RunWith(r.stmtCache).Query()
if err != nil && err != sql.ErrNoRows {
return emptyResult, err
} else if err == nil {
for rows.Next() {
var username string
err := rows.Scan(&username)
if err != nil {
rows.Close()
log.Warnf("Error while scanning rows: %v", err)
return emptyResult, err
}
usernames = append(usernames, username)
}
return usernames, nil
}
return emptyResult, ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query name -> %s: Forbidden", user.Name, searchterm)
return emptyResult, ErrForbidden
}
}
func (r *JobRepository) FindNameByUser(ctx context.Context, searchterm string) (name string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("user.name").Distinct().From("user").
Where("user.username = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&name)
if err != nil && err != sql.ErrNoRows {
return "", err
} else if err == nil {
return name, nil
}
return "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Name -> %s: Forbidden", user.Name, searchterm)
return "", ErrForbidden
}
}
func (r *JobRepository) FindProject(ctx context.Context, searchterm string) (project string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("job.project").Distinct().From("job").
Where("job.project = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&project)
if err != nil && err != sql.ErrNoRows {
return "", err
} else if err == nil {
return project, nil
}
return "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Project -> %s: Forbidden", user.Name, project)
return "", ErrForbidden
}
} }
func (r *JobRepository) Partitions(cluster string) ([]string, error) { func (r *JobRepository) Partitions(cluster string) ([]string, error) {
var err error var err error
start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) { partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
parts := []string{} parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil { if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
@ -466,18 +694,22 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
log.Infof("Timer Partitions %s", time.Since(start))
return partitions.([]string), nil return partitions.([]string), nil
} }
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host. // AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
// Hosts with zero jobs running on them will not show up! // Hosts with zero jobs running on them will not show up!
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) { func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
start := time.Now()
subclusters := make(map[string]map[string]int) subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job"). rows, err := sq.Select("resources", "subcluster").From("job").
Where("job.job_state = 'running'"). Where("job.job_state = 'running'").
Where("job.cluster = ?", cluster). Where("job.cluster = ?", cluster).
RunWith(r.stmtCache).Query() RunWith(r.stmtCache).Query()
if err != nil { if err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
@ -488,9 +720,11 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
var resources []*schema.Resource var resources []*schema.Resource
var subcluster string var subcluster string
if err := rows.Scan(&raw, &subcluster); err != nil { if err := rows.Scan(&raw, &subcluster); err != nil {
log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
if err := json.Unmarshal(raw, &resources); err != nil { if err := json.Unmarshal(raw, &resources); err != nil {
log.Warn("Error while unmarshaling raw resources json")
return nil, err return nil, err
} }
@ -505,10 +739,13 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
} }
} }
log.Infof("Timer AllocatedNodes %s", time.Since(start))
return subclusters, nil return subclusters, nil
} }
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error { func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now()
res, err := sq.Update("job"). res, err := sq.Update("job").
Set("monitoring_status", schema.MonitoringStatusArchivingFailed). Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
Set("duration", 0). Set("duration", 0).
@ -518,16 +755,243 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)). Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
RunWith(r.DB).Exec() RunWith(r.DB).Exec()
if err != nil { if err != nil {
log.Warn("Error while stopping jobs exceeding walltime")
return err return err
} }
rowsAffected, err := res.RowsAffected() rowsAffected, err := res.RowsAffected()
if err != nil { if err != nil {
log.Warn("Error while fetching affected rows after stopping due to exceeded walltime")
return err return err
} }
if rowsAffected > 0 { if rowsAffected > 0 {
log.Warnf("%d jobs have been marked as failed due to running too long", rowsAffected) log.Infof("%d jobs have been marked as failed due to running too long", rowsAffected)
} }
log.Infof("Timer StopJobsExceedingWalltimeBy %s", time.Since(start))
return nil return nil
} }
// TODO: Move to config
const ShortJobDuration int = 5 * 60
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *JobRepository) JobsStatistics(ctx context.Context,
filter []*model.JobFilter,
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
start := time.Now()
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
stats := map[string]*model.JobsStatistics{}
var castType string
if r.driver == "sqlite3" {
castType = "int"
} else if r.driver == "mysql" {
castType = "unsigned"
}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as %s)", subcluster.SocketsPerNode, subcluster.CoresPerSocket, castType)
var rawQuery sq.SelectBuilder
if groupBy == nil {
rawQuery = sq.Select(
"''",
"COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
corehoursCol,
).From("job")
} else {
col := groupBy2column[*groupBy]
rawQuery = sq.Select(
col,
"COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
corehoursCol,
).From("job").GroupBy(col)
}
rawQuery = rawQuery.
Where("job.cluster = ?", cluster.Name).
Where("job.subcluster = ?", subcluster.Name)
query, qerr := SecurityCheck(ctx, rawQuery)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
log.Warn("Error while querying DB for job statistics")
return nil, err
}
for rows.Next() {
var id sql.NullString
var jobs, walltime, corehours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
if s, ok := stats[id.String]; ok {
s.TotalJobs += int(jobs.Int64)
s.TotalWalltime += int(walltime.Int64)
s.TotalCoreHours += int(corehours.Int64)
} else {
stats[id.String] = &model.JobsStatistics{
ID: id.String,
TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64),
TotalCoreHours: int(corehours.Int64),
}
}
}
}
}
}
if groupBy == nil {
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query, qerr := SecurityCheck(ctx, query)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
log.Warn("Error while scanning rows for short job stats")
return nil, err
}
} else {
col := groupBy2column[*groupBy]
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query, qerr := SecurityCheck(ctx, query)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
log.Warn("Error while querying jobs for short jobs")
return nil, err
}
for rows.Next() {
var id sql.NullString
var shortJobs sql.NullInt64
if err := rows.Scan(&id, &shortJobs); err != nil {
log.Warn("Error while scanning rows for short jobs")
return nil, err
}
if id.Valid {
stats[id.String].ShortJobs = int(shortJobs.Int64)
}
}
}
// Calculating the histogram data is expensive, so only do it if needed.
// An explicit resolver can not be used because we need to know the filters.
histogramsNeeded := false
fields := graphql.CollectFieldsCtx(ctx, nil)
for _, col := range fields {
if col.Name == "histDuration" || col.Name == "histNumNodes" {
histogramsNeeded = true
}
}
res := make([]*model.JobsStatistics, 0, len(stats))
for _, stat := range stats {
res = append(res, stat)
id, col := "", ""
if groupBy != nil {
id = stat.ID
col = groupBy2column[*groupBy]
}
if histogramsNeeded {
var err error
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
if err != nil {
log.Warn("Error while loading job statistics histogram: running jobs")
return nil, err
}
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
if err != nil {
log.Warn("Error while loading job statistics histogram: num nodes")
return nil, err
}
}
}
log.Infof("Timer JobStatistics %s", time.Since(start))
return res, nil
}
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
// to add a condition to the query of the kind "<col> = <id>".
func (r *JobRepository) jobsStatisticsHistogram(ctx context.Context,
value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
start := time.Now()
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
query, qerr := SecurityCheck(ctx, sq.Select(value, "COUNT(job.id) AS count").From("job"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
if len(id) != 0 && len(col) != 0 {
query = query.Where(col+" = ?", id)
}
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
points := make([]*model.HistoPoint, 0)
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
points = append(points, &point)
}
log.Infof("Timer jobsStatisticsHistogram %s", time.Since(start))
return points, nil
}

View File

@ -8,10 +8,12 @@ import (
"fmt" "fmt"
"testing" "testing"
"github.com/ClusterCockpit/cc-backend/pkg/log"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
) )
func init() { func init() {
log.Init("info", true)
Connect("sqlite3", "../../test/test.db") Connect("sqlite3", "../../test/test.db")
} }

View File

@ -0,0 +1,109 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"database/sql"
"embed"
"fmt"
"os"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/golang-migrate/migrate/v4"
"github.com/golang-migrate/migrate/v4/database/mysql"
"github.com/golang-migrate/migrate/v4/database/sqlite3"
"github.com/golang-migrate/migrate/v4/source/iofs"
)
const supportedVersion uint = 2
//go:embed migrations/*
var migrationFiles embed.FS
func checkDBVersion(backend string, db *sql.DB) {
var m *migrate.Migrate
if backend == "sqlite3" {
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
if err != nil {
log.Fatal(err)
}
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver)
if err != nil {
log.Fatal(err)
}
} else if backend == "mysql" {
driver, err := mysql.WithInstance(db, &mysql.Config{})
if err != nil {
log.Fatal(err)
}
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithInstance("iofs", d, "mysql", driver)
if err != nil {
log.Fatal(err)
}
}
v, _, err := m.Version()
if err != nil {
if err == migrate.ErrNilVersion {
log.Warn("Legacy database without version or missing database file!")
} else {
log.Fatal(err)
}
}
if v < supportedVersion {
log.Warnf("Unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend --migrate-db", v, supportedVersion)
os.Exit(0)
}
if v > supportedVersion {
log.Warnf("Unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool!", v, supportedVersion)
os.Exit(0)
}
}
func MigrateDB(backend string, db string) {
var m *migrate.Migrate
if backend == "sqlite3" {
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
if err != nil {
log.Fatal(err)
}
} else if backend == "mysql" {
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
if err != nil {
log.Fatal(err)
}
}
if err := m.Up(); err != nil {
log.Fatal(err)
}
m.Close()
}

View File

@ -0,0 +1,5 @@
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tags;
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS configuration;
DROP TABLE IF EXISTS user;

View File

@ -0,0 +1,62 @@
CREATE TABLE IF NOT EXISTS job (
id INTEGER AUTO_INCREMENT PRIMARY KEY ,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
`partition` VARCHAR(255) NOT NULL,
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE IF NOT EXISTS tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE IF NOT EXISTS jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);

View File

@ -0,0 +1,5 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_by_state;

View File

@ -0,0 +1,5 @@
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);

View File

@ -0,0 +1,5 @@
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tags;
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS configuration;
DROP TABLE IF EXISTS user;

View File

@ -0,0 +1,62 @@
CREATE TABLE IF NOT EXISTS job (
id INTEGER PRIMARY KEY,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
partition VARCHAR(255) NOT NULL,
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE IF NOT EXISTS tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE IF NOT EXISTS jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);

View File

@ -0,0 +1,5 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_by_state;

View File

@ -0,0 +1,5 @@
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);

View File

@ -39,7 +39,7 @@ func (r *JobRepository) QueryJobs(
} else if order.Order == model.SortDirectionEnumDesc { } else if order.Order == model.SortDirectionEnumDesc {
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field)) query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
} else { } else {
return nil, errors.New("invalid sorting order") return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
} }
} }
@ -54,12 +54,14 @@ func (r *JobRepository) QueryJobs(
sql, args, err := query.ToSql() sql, args, err := query.ToSql()
if err != nil { if err != nil {
log.Warn("Error while converting query to sql")
return nil, err return nil, err
} }
log.Debugf("SQL query: `%s`, args: %#v", sql, args) log.Debugf("SQL query: `%s`, args: %#v", sql, args)
rows, err := query.RunWith(r.stmtCache).Query() rows, err := query.RunWith(r.stmtCache).Query()
if err != nil { if err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
@ -68,6 +70,7 @@ func (r *JobRepository) QueryJobs(
job, err := scanJob(rows) job, err := scanJob(rows)
if err != nil { if err != nil {
rows.Close() rows.Close()
log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
jobs = append(jobs, job) jobs = append(jobs, job)
@ -135,6 +138,9 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
if filter.Project != nil { if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query) query = buildStringCondition("job.project", filter.Project, query)
} }
if filter.JobName != nil {
query = buildStringCondition("job.meta_data", filter.JobName, query)
}
if filter.Cluster != nil { if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query) query = buildStringCondition("job.cluster", filter.Cluster, query)
} }
@ -217,6 +223,13 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
if cond.Contains != nil { if cond.Contains != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%")) return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
} }
if cond.In != nil {
queryUsers := make([]string, len(cond.In))
for i, val := range cond.In {
queryUsers[i] = val
}
return query.Where(sq.Or{sq.Eq{"job.user": queryUsers}})
}
return query return query
} }
@ -226,7 +239,7 @@ var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
func toSnakeCase(str string) string { func toSnakeCase(str string) string {
for _, c := range str { for _, c := range str {
if c == '\'' || c == '\\' { if c == '\'' || c == '\\' {
panic("A hacker (probably not)!!!") log.Panic("toSnakeCase() attack vector!")
} }
} }

View File

@ -9,22 +9,26 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
sq "github.com/Masterminds/squirrel" sq "github.com/Masterminds/squirrel"
) )
// Add the tag with id `tagId` to the job with the database id `jobId`. // Add the tag with id `tagId` to the job with the database id `jobId`.
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) { func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil { if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
j, err := r.FindById(job) j, err := r.FindById(job)
if err != nil { if err != nil {
log.Warn("Error while finding job by id")
return nil, err return nil, err
} }
tags, err := r.GetTags(&job) tags, err := r.GetTags(&job)
if err != nil { if err != nil {
log.Warn("Error while getting tags for job")
return nil, err return nil, err
} }
@ -34,16 +38,19 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
// Removes a tag from a job // Removes a tag from a job
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) { func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil { if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
j, err := r.FindById(job) j, err := r.FindById(job)
if err != nil { if err != nil {
log.Warn("Error while finding job by id")
return nil, err return nil, err
} }
tags, err := r.GetTags(&job) tags, err := r.GetTags(&job)
if err != nil { if err != nil {
log.Warn("Error while getting tags for job")
return nil, err return nil, err
} }
@ -144,6 +151,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
rows, err := q.RunWith(r.stmtCache).Query() rows, err := q.RunWith(r.stmtCache).Query()
if err != nil { if err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
@ -151,6 +159,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
for rows.Next() { for rows.Next() {
tag := &schema.Tag{} tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil { if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
tags = append(tags, tag) tags = append(tags, tag)

View File

@ -6,13 +6,13 @@ package repository
import ( import (
"encoding/json" "encoding/json"
"log"
"sync" "sync"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache" "github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/jmoiron/sqlx" "github.com/jmoiron/sqlx"
) )
@ -33,21 +33,9 @@ func GetUserCfgRepo() *UserCfgRepo {
userCfgRepoOnce.Do(func() { userCfgRepoOnce.Do(func() {
db := GetConnection() db := GetConnection()
_, err := db.DB.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
log.Fatal(err)
}
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`) lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil { if err != nil {
log.Fatal(err) log.Fatalf("db.DB.Preparex() error: %v", err)
} }
userCfgRepoInstance = &UserCfgRepo{ userCfgRepoInstance = &UserCfgRepo{
@ -82,6 +70,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
rows, err := uCfg.Lookup.Query(user.Username) rows, err := uCfg.Lookup.Query(user.Username)
if err != nil { if err != nil {
log.Warnf("Error while looking up user config for user '%v'", user.Username)
return err, 0, 0 return err, 0, 0
} }
@ -90,11 +79,13 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
for rows.Next() { for rows.Next() {
var key, rawval string var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil { if err := rows.Scan(&key, &rawval); err != nil {
log.Warn("Error while scanning user config values")
return err, 0, 0 return err, 0, 0
} }
var val interface{} var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil { if err := json.Unmarshal([]byte(rawval), &val); err != nil {
log.Warn("Error while unmarshaling raw user config json")
return err, 0, 0 return err, 0, 0
} }
@ -106,6 +97,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
return config, 24 * time.Hour, size return config, 24 * time.Hour, size
}) })
if err, ok := data.(error); ok { if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err return nil, err
} }
@ -122,6 +114,7 @@ func (uCfg *UserCfgRepo) UpdateConfig(
if user == nil { if user == nil {
var val interface{} var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil { if err := json.Unmarshal([]byte(value), &val); err != nil {
log.Warn("Error while unmarshaling raw user config json")
return err return err
} }
@ -131,8 +124,8 @@ func (uCfg *UserCfgRepo) UpdateConfig(
return nil return nil
} }
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`, if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`, user, key, value); err != nil {
user.Username, key, value); err != nil { log.Warnf("Error while replacing user config in DB for user '%v'", user)
return err return err
} }

View File

@ -12,8 +12,8 @@ import (
"strings" "strings"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
@ -44,7 +44,7 @@ var routes []Route = []Route{
{"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute}, {"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute},
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> - ClusterCockpit", false, setupClusterRoute}, {"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> - ClusterCockpit", false, setupClusterRoute},
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute}, {"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analaysis - ClusterCockpit", true, setupAnalysisRoute}, {"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterRoute}, {"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterRoute},
} }
@ -61,21 +61,21 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
State: []schema.JobState{schema.JobStateRunning}, State: []schema.JobState{schema.JobStateRunning},
}}, nil, nil) }}, nil, nil)
if err != nil { if err != nil {
log.Errorf("failed to count jobs: %s", err.Error()) log.Warnf("failed to count jobs: %s", err.Error())
runningJobs = map[string]int{} runningJobs = map[string]int{}
} }
totalJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, nil, nil, nil) totalJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, nil, nil, nil)
if err != nil { if err != nil {
log.Errorf("failed to count jobs: %s", err.Error()) log.Warnf("failed to count jobs: %s", err.Error())
totalJobs = map[string]int{} totalJobs = map[string]int{}
} }
from := time.Now().Add(-24 * time.Hour) from := time.Now().Add(-24 * time.Hour)
recentShortJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{ recentShortJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
StartTime: &schema.TimeRange{From: &from, To: nil}, StartTime: &schema.TimeRange{From: &from, To: nil},
Duration: &schema.IntRange{From: 0, To: graph.ShortJobDuration}, Duration: &schema.IntRange{From: 0, To: repository.ShortJobDuration},
}}, nil, nil) }}, nil, nil)
if err != nil { if err != nil {
log.Errorf("failed to count jobs: %s", err.Error()) log.Warnf("failed to count jobs: %s", err.Error())
recentShortJobs = map[string]int{} recentShortJobs = map[string]int{}
} }
@ -158,7 +158,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
tags, counts, err := jobRepo.CountTags(username, projects) tags, counts, err := jobRepo.CountTags(username, projects)
tagMap := make(map[string][]map[string]interface{}) tagMap := make(map[string][]map[string]interface{})
if err != nil { if err != nil {
log.Errorf("GetTags failed: %s", err.Error()) log.Warnf("GetTags failed: %s", err.Error())
i["tagmap"] = tagMap i["tagmap"] = tagMap
return i return i
} }
@ -188,9 +188,17 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
filterPresets["project"] = query.Get("project") filterPresets["project"] = query.Get("project")
filterPresets["projectMatch"] = "eq" filterPresets["projectMatch"] = "eq"
} }
if query.Get("user") != "" { if query.Get("jobName") != "" {
filterPresets["user"] = query.Get("user") filterPresets["jobName"] = query.Get("jobName")
filterPresets["userMatch"] = "eq" }
if len(query["user"]) != 0 {
if len(query["user"]) == 1 {
filterPresets["user"] = query.Get("user")
filterPresets["userMatch"] = "contains"
} else {
filterPresets["user"] = query["user"]
filterPresets["userMatch"] = "in"
}
} }
if len(query["state"]) != 0 { if len(query["state"]) != 0 {
filterPresets["state"] = query["state"] filterPresets["state"] = query["state"]
@ -301,3 +309,81 @@ func SetupRoutes(router *mux.Router, version string, hash string, buildTime stri
}) })
} }
} }
func HandleSearchBar(rw http.ResponseWriter, r *http.Request, api *api.RestApi) {
if search := r.URL.Query().Get("searchId"); search != "" {
splitSearch := strings.Split(search, ":")
if len(splitSearch) == 2 {
switch strings.Trim(splitSearch[0], " ") {
case "jobId":
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
return
case "jobName":
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
return
case "projectId":
project, _ := api.JobRepository.FindProject(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: projectId
if project != "" {
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(project), http.StatusTemporaryRedirect)
return
} else {
http.Redirect(rw, r, "/monitoring/jobs/?jobId=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
}
case "username":
usernames, _ := api.JobRepository.FindUsers(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: usernames
if len(usernames) == 1 {
http.Redirect(rw, r, "/monitoring/user/"+usernames[0], http.StatusTemporaryRedirect) // One Match: Redirect to User View
return
} else if len(usernames) > 1 {
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // > 1 Matches: Redirect to user table
return
} else {
http.Redirect(rw, r, "/monitoring/users/?user=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
}
case "name":
usernames, _ := api.JobRepository.FindUsersByName(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: usernames queried by name
if len(usernames) == 1 {
http.Redirect(rw, r, "/monitoring/user/"+usernames[0], http.StatusTemporaryRedirect)
return
} else if len(usernames) > 1 {
joinedNames := strings.Join(usernames, "&user=")
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames, http.StatusTemporaryRedirect) // > 1 Matches: Redirect to user table
return
} else {
http.Redirect(rw, r, "/monitoring/users/?user=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
}
default:
http.Error(rw, "'searchId' type parameter unknown", http.StatusBadRequest)
}
} else if len(splitSearch) == 1 {
jobname, username, project, err := api.JobRepository.FindJobnameOrUserOrProject(r.Context(), strings.Trim(search, " ")) // Determine Access within
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
if username != "" {
http.Redirect(rw, r, "/monitoring/user/"+username, http.StatusTemporaryRedirect) // User: Redirect to user page
return
} else if project != "" {
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // projectId (equal)
return
} else if jobname != "" {
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // JobName (contains)
return
} else {
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // No Result: Probably jobId
return
}
} else {
http.Error(rw, "'searchId' query parameter malformed", http.StatusBadRequest)
}
} else {
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect)
}
}

View File

@ -14,6 +14,8 @@ import (
"strconv" "strconv"
"strings" "strings"
"syscall" "syscall"
"github.com/ClusterCockpit/cc-backend/pkg/log"
) )
// Very simple and limited .env file reader. // Very simple and limited .env file reader.
@ -22,6 +24,7 @@ import (
func LoadEnv(file string) error { func LoadEnv(file string) error {
f, err := os.Open(file) f, err := os.Open(file)
if err != nil { if err != nil {
log.Error("Error while opening file")
return err return err
} }
@ -40,14 +43,14 @@ func LoadEnv(file string) error {
line = strings.TrimPrefix(line, "export ") line = strings.TrimPrefix(line, "export ")
parts := strings.SplitN(line, "=", 2) parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 { if len(parts) != 2 {
return fmt.Errorf("unsupported line: %#v", line) return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
} }
key := strings.TrimSpace(parts[0]) key := strings.TrimSpace(parts[0])
val := strings.TrimSpace(parts[1]) val := strings.TrimSpace(parts[1])
if strings.HasPrefix(val, "\"") { if strings.HasPrefix(val, "\"") {
if !strings.HasSuffix(val, "\"") { if !strings.HasSuffix(val, "\"") {
return fmt.Errorf("unsupported line: %#v", line) return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
} }
runes := []rune(val[1 : len(val)-1]) runes := []rune(val[1 : len(val)-1])
@ -65,7 +68,7 @@ func LoadEnv(file string) error {
case '"': case '"':
sb.WriteRune('"') sb.WriteRune('"')
default: default:
return fmt.Errorf("unsupprorted escape sequence in quoted string: backslash %#v", runes[i]) return fmt.Errorf("RUNTIME/SETUP > unsupported escape sequence in quoted string: backslash %#v", runes[i])
} }
continue continue
} }
@ -89,11 +92,13 @@ func DropPrivileges(username string, group string) error {
if group != "" { if group != "" {
g, err := user.LookupGroup(group) g, err := user.LookupGroup(group)
if err != nil { if err != nil {
log.Warn("Error while looking up group")
return err return err
} }
gid, _ := strconv.Atoi(g.Gid) gid, _ := strconv.Atoi(g.Gid)
if err := syscall.Setgid(gid); err != nil { if err := syscall.Setgid(gid); err != nil {
log.Warn("Error while setting gid")
return err return err
} }
} }
@ -101,11 +106,13 @@ func DropPrivileges(username string, group string) error {
if username != "" { if username != "" {
u, err := user.Lookup(username) u, err := user.Lookup(username)
if err != nil { if err != nil {
log.Warn("Error while looking up user")
return err return err
} }
uid, _ := strconv.Atoi(u.Uid) uid, _ := strconv.Atoi(u.Uid)
if err := syscall.Setuid(uid); err != nil { if err := syscall.Setuid(uid); err != nil {
log.Warn("Error while setting uid")
return err return err
} }
} }

View File

@ -10,6 +10,7 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/lrucache" "github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
) )
type ArchiveBackend interface { type ArchiveBackend interface {
@ -40,6 +41,7 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
Kind string `json:"kind"` Kind string `json:"kind"`
} }
if err := json.Unmarshal(rawConfig, &kind); err != nil { if err := json.Unmarshal(rawConfig, &kind); err != nil {
log.Warn("Error while unmarshaling raw config json")
return err return err
} }
@ -49,10 +51,11 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
// case "s3": // case "s3":
// ar = &S3Archive{} // ar = &S3Archive{}
default: default:
return fmt.Errorf("unkown archive backend '%s''", kind.Kind) return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", kind.Kind)
} }
if err := ar.Init(rawConfig); err != nil { if err := ar.Init(rawConfig); err != nil {
log.Error("Error while initializing archiveBackend")
return err return err
} }
return initClusterConfig() return initClusterConfig()
@ -70,6 +73,7 @@ func LoadAveragesFromArchive(
metaFile, err := ar.LoadJobMeta(job) metaFile, err := ar.LoadJobMeta(job)
if err != nil { if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err return err
} }
@ -88,6 +92,7 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
metaFile, err := ar.LoadJobMeta(job) metaFile, err := ar.LoadJobMeta(job)
if err != nil { if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return nil, err return nil, err
} }
@ -104,6 +109,7 @@ func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
jobMeta, err := ar.LoadJobMeta(job) jobMeta, err := ar.LoadJobMeta(job)
if err != nil { if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err return err
} }

View File

@ -9,6 +9,7 @@ import (
"fmt" "fmt"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
) )
var Clusters []*schema.Cluster var Clusters []*schema.Cluster
@ -23,6 +24,7 @@ func initClusterConfig() error {
cluster, err := ar.LoadClusterCfg(c) cluster, err := ar.LoadClusterCfg(c)
if err != nil { if err != nil {
log.Warnf("Error while loading cluster config for cluster '%v'", c)
return err return err
} }
@ -59,7 +61,7 @@ func initClusterConfig() error {
nl, err := ParseNodeList(sc.Nodes) nl, err := ParseNodeList(sc.Nodes)
if err != nil { if err != nil {
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err) return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
} }
nodeLists[cluster.Name][sc.Name] = nl nodeLists[cluster.Name][sc.Name] = nl
} }
@ -112,7 +114,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
cluster := GetCluster(job.Cluster) cluster := GetCluster(job.Cluster)
if cluster == nil { if cluster == nil {
return fmt.Errorf("unkown cluster: %#v", job.Cluster) return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
} }
if job.SubCluster != "" { if job.SubCluster != "" {
@ -121,11 +123,11 @@ func AssignSubCluster(job *schema.BaseJob) error {
return nil return nil
} }
} }
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster) return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > already assigned subcluster %v unkown (cluster: %v)", job.SubCluster, job.Cluster)
} }
if len(job.Resources) == 0 { if len(job.Resources) == 0 {
return fmt.Errorf("job without any resources/hosts") return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > job without any resources/hosts")
} }
host0 := job.Resources[0].Hostname host0 := job.Resources[0].Hostname
@ -141,7 +143,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
return nil return nil
} }
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0) return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", job.Cluster, host0)
} }
func GetSubClusterByNode(cluster, hostname string) (string, error) { func GetSubClusterByNode(cluster, hostname string) (string, error) {
@ -154,12 +156,12 @@ func GetSubClusterByNode(cluster, hostname string) (string, error) {
c := GetCluster(cluster) c := GetCluster(cluster)
if c == nil { if c == nil {
return "", fmt.Errorf("unkown cluster: %#v", cluster) return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", cluster)
} }
if c.SubClusters[0].Nodes == "" { if c.SubClusters[0].Nodes == "" {
return c.SubClusters[0].Name, nil return c.SubClusters[0].Name, nil
} }
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname) return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
} }

View File

@ -46,7 +46,7 @@ func loadJobMeta(filename string) (*schema.JobMeta, error) {
f, err := os.Open(filename) f, err := os.Open(filename)
if err != nil { if err != nil {
log.Errorf("fsBackend loadJobMeta()- %v", err) log.Errorf("loadJobMeta() > open file error: %v", err)
return &schema.JobMeta{}, err return &schema.JobMeta{}, err
} }
defer f.Close() defer f.Close()
@ -58,19 +58,19 @@ func (fsa *FsArchive) Init(rawConfig json.RawMessage) error {
var config FsArchiveConfig var config FsArchiveConfig
if err := json.Unmarshal(rawConfig, &config); err != nil { if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Errorf("fsBackend Init()- %v", err) log.Warnf("Init() > Unmarshal error: %#v", err)
return err return err
} }
if config.Path == "" { if config.Path == "" {
err := fmt.Errorf("fsBackend Init()- empty path") err := fmt.Errorf("Init() : empty config.Path")
log.Errorf("fsBackend Init()- %v", err) log.Errorf("Init() > config.Path error: %v", err)
return err return err
} }
fsa.path = config.Path fsa.path = config.Path
entries, err := os.ReadDir(fsa.path) entries, err := os.ReadDir(fsa.path)
if err != nil { if err != nil {
log.Errorf("fsBackend Init()- %v", err) log.Errorf("Init() > ReadDir() error: %v", err)
return err return err
} }
@ -86,7 +86,7 @@ func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
filename := getPath(job, fsa.path, "data.json") filename := getPath(job, fsa.path, "data.json")
f, err := os.Open(filename) f, err := os.Open(filename)
if err != nil { if err != nil {
log.Errorf("fsBackend LoadJobData()- %v", err) log.Errorf("LoadJobData() > open file error: %v", err)
return nil, err return nil, err
} }
defer f.Close() defer f.Close()
@ -104,11 +104,12 @@ func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json")) b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
if err != nil { if err != nil {
log.Errorf("fsBackend LoadClusterCfg()- %v", err) log.Errorf("LoadClusterCfg() > open file error: %v", err)
return &schema.Cluster{}, err return &schema.Cluster{}, err
} }
if config.Keys.Validate { if config.Keys.Validate {
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil { if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
log.Warnf("Validate cluster config: %v\n", err)
return &schema.Cluster{}, fmt.Errorf("Validate cluster config: %v\n", err) return &schema.Cluster{}, fmt.Errorf("Validate cluster config: %v\n", err)
} }
} }
@ -121,13 +122,13 @@ func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
go func() { go func() {
clustersDir, err := os.ReadDir(fsa.path) clustersDir, err := os.ReadDir(fsa.path)
if err != nil { if err != nil {
log.Fatalf("Reading clusters failed: %s", err.Error()) log.Fatalf("Reading clusters failed @ cluster dirs: %s", err.Error())
} }
for _, clusterDir := range clustersDir { for _, clusterDir := range clustersDir {
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name())) lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
if err != nil { if err != nil {
log.Fatalf("Reading jobs failed: %s", err.Error()) log.Fatalf("Reading jobs failed @ lvl1 dirs: %s", err.Error())
} }
for _, lvl1Dir := range lvl1Dirs { for _, lvl1Dir := range lvl1Dirs {
@ -138,21 +139,21 @@ func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name())) lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name()))
if err != nil { if err != nil {
log.Fatalf("Reading jobs failed: %s", err.Error()) log.Fatalf("Reading jobs failed @ lvl2 dirs: %s", err.Error())
} }
for _, lvl2Dir := range lvl2Dirs { for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name()) dirpath := filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath) startTimeDirs, err := os.ReadDir(dirpath)
if err != nil { if err != nil {
log.Fatalf("Reading jobs failed: %s", err.Error()) log.Fatalf("Reading jobs failed @ starttime dirs: %s", err.Error())
} }
for _, startTimeDir := range startTimeDirs { for _, startTimeDir := range startTimeDirs {
if startTimeDir.IsDir() { if startTimeDir.IsDir() {
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json")) job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
if err != nil { if err != nil {
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error()) log.Errorf("error in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
} else { } else {
ch <- job ch <- job
} }
@ -175,12 +176,15 @@ func (fsa *FsArchive) StoreJobMeta(jobMeta *schema.JobMeta) error {
} }
f, err := os.Create(getPath(&job, fsa.path, "meta.json")) f, err := os.Create(getPath(&job, fsa.path, "meta.json"))
if err != nil { if err != nil {
log.Error("Error while creating filepath for meta.json")
return err return err
} }
if err := EncodeJobMeta(f, jobMeta); err != nil { if err := EncodeJobMeta(f, jobMeta); err != nil {
log.Error("Error while encoding job metadata to meta.json file")
return err return err
} }
if err := f.Close(); err != nil { if err := f.Close(); err != nil {
log.Warn("Error while closing meta.json file")
return err return err
} }
@ -203,26 +207,38 @@ func (fsa *FsArchive) ImportJob(
} }
dir := getPath(&job, fsa.path, "") dir := getPath(&job, fsa.path, "")
if err := os.MkdirAll(dir, 0777); err != nil { if err := os.MkdirAll(dir, 0777); err != nil {
log.Error("Error while creating job archive path")
return err return err
} }
f, err := os.Create(path.Join(dir, "meta.json")) f, err := os.Create(path.Join(dir, "meta.json"))
if err != nil { if err != nil {
log.Error("Error while creating filepath for meta.json")
return err return err
} }
if err := EncodeJobMeta(f, jobMeta); err != nil { if err := EncodeJobMeta(f, jobMeta); err != nil {
log.Error("Error while encoding job metadata to meta.json file")
return err return err
} }
if err := f.Close(); err != nil { if err := f.Close(); err != nil {
log.Warn("Error while closing meta.json file")
return err return err
} }
f, err = os.Create(path.Join(dir, "data.json")) f, err = os.Create(path.Join(dir, "data.json"))
if err != nil { if err != nil {
log.Error("Error while creating filepath for data.json")
return err return err
} }
if err := EncodeJobData(f, jobData); err != nil { if err := EncodeJobData(f, jobData); err != nil {
log.Error("Error while encoding job metricdata to data.json file")
return err return err
} }
return f.Close() if err := f.Close(); err != nil {
log.Warn("Error while closing data.json file")
return err
}
// no error: final return is nil
return nil
} }

View File

@ -10,9 +10,14 @@ import (
"testing" "testing"
"time" "time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
) )
func init() {
log.Init("info", true)
}
func TestInitEmptyPath(t *testing.T) { func TestInitEmptyPath(t *testing.T) {
var fsa FsArchive var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"kind\":\"../../test/archive\"}")) err := fsa.Init(json.RawMessage("{\"kind\":\"../../test/archive\"}"))

View File

@ -10,12 +10,14 @@ import (
"time" "time"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
) )
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) { func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
data := cache.Get(k, func() (value interface{}, ttl time.Duration, size int) { data := cache.Get(k, func() (value interface{}, ttl time.Duration, size int) {
var d schema.JobData var d schema.JobData
if err := json.NewDecoder(r).Decode(&d); err != nil { if err := json.NewDecoder(r).Decode(&d); err != nil {
log.Warn("Error while decoding raw job data json")
return err, 0, 1000 return err, 0, 1000
} }
@ -23,6 +25,7 @@ func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
}) })
if err, ok := data.(error); ok { if err, ok := data.(error); ok {
log.Warn("Error in decoded job data set")
return nil, err return nil, err
} }
@ -32,6 +35,7 @@ func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) { func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
var d schema.JobMeta var d schema.JobMeta
if err := json.NewDecoder(r).Decode(&d); err != nil { if err := json.NewDecoder(r).Decode(&d); err != nil {
log.Warn("Error while decoding raw job meta json")
return &d, err return &d, err
} }
@ -43,6 +47,7 @@ func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
func DecodeCluster(r io.Reader) (*schema.Cluster, error) { func DecodeCluster(r io.Reader) (*schema.Cluster, error) {
var c schema.Cluster var c schema.Cluster
if err := json.NewDecoder(r).Decode(&c); err != nil { if err := json.NewDecoder(r).Decode(&c); err != nil {
log.Warn("Error while decoding raw cluster json")
return &c, err return &c, err
} }
@ -54,6 +59,7 @@ func DecodeCluster(r io.Reader) (*schema.Cluster, error) {
func EncodeJobData(w io.Writer, d *schema.JobData) error { func EncodeJobData(w io.Writer, d *schema.JobData) error {
// Sanitize parameters // Sanitize parameters
if err := json.NewEncoder(w).Encode(d); err != nil { if err := json.NewEncoder(w).Encode(d); err != nil {
log.Warn("Error while encoding new job data json")
return err return err
} }
@ -63,6 +69,7 @@ func EncodeJobData(w io.Writer, d *schema.JobData) error {
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error { func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
// Sanitize parameters // Sanitize parameters
if err := json.NewEncoder(w).Encode(d); err != nil { if err := json.NewEncoder(w).Encode(d); err != nil {
log.Warn("Error while encoding new job meta json")
return err return err
} }

View File

@ -64,7 +64,7 @@ type NLExprIntRange struct {
func (nle NLExprIntRange) consume(input string) (next string, ok bool) { func (nle NLExprIntRange) consume(input string) (next string, ok bool) {
if !nle.zeroPadded || nle.digits < 1 { if !nle.zeroPadded || nle.digits < 1 {
log.Error("node list: only zero-padded ranges are allowed") log.Error("only zero-padded ranges are allowed")
return "", false return "", false
} }
@ -102,7 +102,7 @@ func ParseNodeList(raw string) (NodeList, error) {
i++ i++
} }
if i == len(raw) { if i == len(raw) {
return nil, fmt.Errorf("node list: unclosed '['") return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
} }
} else if raw[i] == ',' { } else if raw[i] == ',' {
rawterms = append(rawterms, raw[prevterm:i]) rawterms = append(rawterms, raw[prevterm:i])
@ -135,7 +135,7 @@ func ParseNodeList(raw string) (NodeList, error) {
end := strings.Index(rawterm[i:], "]") end := strings.Index(rawterm[i:], "]")
if end == -1 { if end == -1 {
return nil, fmt.Errorf("node list: unclosed '['") return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
} }
parts := strings.Split(rawterm[i+1:i+end], ",") parts := strings.Split(rawterm[i+1:i+end], ",")
@ -144,21 +144,21 @@ func ParseNodeList(raw string) (NodeList, error) {
for _, part := range parts { for _, part := range parts {
minus := strings.Index(part, "-") minus := strings.Index(part, "-")
if minus == -1 { if minus == -1 {
return nil, fmt.Errorf("node list: no '-' found inside '[...]'") return nil, fmt.Errorf("ARCHIVE/NODELIST > no '-' found inside '[...]'")
} }
s1, s2 := part[0:minus], part[minus+1:] s1, s2 := part[0:minus], part[minus+1:]
if len(s1) != len(s2) || len(s1) == 0 { if len(s1) != len(s2) || len(s1) == 0 {
return nil, fmt.Errorf("node list: %#v and %#v are not of equal length or of length zero", s1, s2) return nil, fmt.Errorf("ARCHIVE/NODELIST > %v and %v are not of equal length or of length zero", s1, s2)
} }
x1, err := strconv.ParseInt(s1, 10, 32) x1, err := strconv.ParseInt(s1, 10, 32)
if err != nil { if err != nil {
return nil, fmt.Errorf("node list: %w", err) return nil, fmt.Errorf("ARCHIVE/NODELIST > could not parse int: %w", err)
} }
x2, err := strconv.ParseInt(s2, 10, 32) x2, err := strconv.ParseInt(s2, 10, 32)
if err != nil { if err != nil {
return nil, fmt.Errorf("node list: %w", err) return nil, fmt.Errorf("ARCHIVE/NODELIST > could not parse int: %w", err)
} }
nles = append(nles, NLExprIntRange{ nles = append(nles, NLExprIntRange{
@ -172,7 +172,7 @@ func ParseNodeList(raw string) (NodeList, error) {
exprs = append(exprs, nles) exprs = append(exprs, nles)
i += end i += end
} else { } else {
return nil, fmt.Errorf("node list: invalid character: %#v", rune(c)) return nil, fmt.Errorf("ARCHIVE/NODELIST > invalid character: %#v", rune(c))
} }
} }
nl = append(nl, exprs) nl = append(nl, exprs)

View File

@ -12,8 +12,8 @@ import (
) )
// Provides a simple way of logging with different levels. // Provides a simple way of logging with different levels.
// Time/Data are not logged on purpose because systemd adds // Time/Date are not logged because systemd adds
// them for us. // them for us (Default, can be changed by flag '--logdate true').
// //
// Uses these prefixes: https://www.freedesktop.org/software/systemd/man/sd-daemon.html // Uses these prefixes: https://www.freedesktop.org/software/systemd/man/sd-daemon.html
@ -22,109 +22,162 @@ var (
InfoWriter io.Writer = os.Stderr InfoWriter io.Writer = os.Stderr
WarnWriter io.Writer = os.Stderr WarnWriter io.Writer = os.Stderr
ErrWriter io.Writer = os.Stderr ErrWriter io.Writer = os.Stderr
CritWriter io.Writer = os.Stderr
) )
var ( var (
DebugPrefix string = "<7>[DEBUG] " DebugPrefix string = "<7>[DEBUG] "
InfoPrefix string = "<6>[INFO] " InfoPrefix string = "<6>[INFO] "
WarnPrefix string = "<4>[WARNING] " WarnPrefix string = "<4>[WARNING] "
ErrPrefix string = "<3>[ERROR] " ErrPrefix string = "<3>[ERROR] "
CritPrefix string = "<2>[CRITICAL] "
) )
var ( var (
DebugLog *log.Logger = log.New(DebugWriter, DebugPrefix, 0) DebugLog *log.Logger
InfoLog *log.Logger = log.New(InfoWriter, InfoPrefix, 0) InfoLog *log.Logger
WarnLog *log.Logger = log.New(WarnWriter, WarnPrefix, 0) WarnLog *log.Logger
ErrLog *log.Logger = log.New(ErrWriter, ErrPrefix, 0) ErrLog *log.Logger
CritLog *log.Logger
) )
func init() { /* CONFIG */
if lvl, ok := os.LookupEnv("LOGLEVEL"); ok {
switch lvl { func Init(lvl string, logdate bool) {
case "err", "fatal": switch lvl {
WarnWriter = io.Discard case "crit":
fallthrough ErrWriter = io.Discard
case "warn": fallthrough
InfoWriter = io.Discard case "err", "fatal":
fallthrough WarnWriter = io.Discard
case "info": fallthrough
DebugWriter = io.Discard case "warn":
case "debug": InfoWriter = io.Discard
// Nothing to do... fallthrough
default: case "info":
Warnf("environment variable LOGLEVEL has invalid value %#v", lvl) DebugWriter = io.Discard
} case "debug":
// Nothing to do...
break
default:
fmt.Printf("pkg/log: Flag 'loglevel' has invalid value %#v\npkg/log: Will use default loglevel 'debug'\n", lvl)
//SetLogLevel("debug")
}
if !logdate {
DebugLog = log.New(DebugWriter, DebugPrefix, 0)
InfoLog = log.New(InfoWriter, InfoPrefix, log.Lshortfile)
WarnLog = log.New(WarnWriter, WarnPrefix, log.Lshortfile)
ErrLog = log.New(ErrWriter, ErrPrefix, log.Llongfile)
CritLog = log.New(CritWriter, CritPrefix, log.Llongfile)
} else {
DebugLog = log.New(DebugWriter, DebugPrefix, log.LstdFlags)
InfoLog = log.New(InfoWriter, InfoPrefix, log.LstdFlags|log.Lshortfile)
WarnLog = log.New(WarnWriter, WarnPrefix, log.LstdFlags|log.Lshortfile)
ErrLog = log.New(ErrWriter, ErrPrefix, log.LstdFlags|log.Llongfile)
CritLog = log.New(CritWriter, CritPrefix, log.LstdFlags|log.Llongfile)
} }
} }
func Debug(v ...interface{}) { /* PRINT */
if DebugWriter != io.Discard {
DebugLog.Print(v...) // Private helper
} func printStr(v ...interface{}) string {
} return fmt.Sprint(v...)
func Info(v ...interface{}) {
if InfoWriter != io.Discard {
InfoLog.Print(v...)
}
} }
// Uses Info() -> If errorpath required at some point:
// Will need own writer with 'Output(2, out)' to correctly render path
func Print(v ...interface{}) { func Print(v ...interface{}) {
Info(v...) Info(v...)
} }
func Debug(v ...interface{}) {
DebugLog.Output(2, printStr(v...))
}
func Info(v ...interface{}) {
InfoLog.Output(2, printStr(v...))
}
func Warn(v ...interface{}) { func Warn(v ...interface{}) {
if WarnWriter != io.Discard { WarnLog.Output(2, printStr(v...))
WarnLog.Print(v...)
}
} }
func Error(v ...interface{}) { func Error(v ...interface{}) {
if ErrWriter != io.Discard { ErrLog.Output(2, printStr(v...))
ErrLog.Print(v...)
}
} }
// Writes panic stacktrace, but keeps application alive
func Panic(v ...interface{}) {
ErrLog.Output(2, printStr(v...))
panic("Panic triggered ...")
}
func Crit(v ...interface{}) {
CritLog.Output(2, printStr(v...))
}
// Writes critical log, stops application
func Fatal(v ...interface{}) { func Fatal(v ...interface{}) {
Error(v...) CritLog.Output(2, printStr(v...))
os.Exit(1) os.Exit(1)
} }
func Debugf(format string, v ...interface{}) { /* PRINT FORMAT*/
if DebugWriter != io.Discard {
DebugLog.Printf(format, v...) // Private helper
} func printfStr(format string, v ...interface{}) string {
} return fmt.Sprintf(format, v...)
func Infof(format string, v ...interface{}) {
if InfoWriter != io.Discard {
InfoLog.Printf(format, v...)
}
} }
// Uses Infof() -> If errorpath required at some point:
// Will need own writer with 'Output(2, out)' to correctly render path
func Printf(format string, v ...interface{}) { func Printf(format string, v ...interface{}) {
Infof(format, v...) Infof(format, v...)
} }
func Finfof(w io.Writer, format string, v ...interface{}) { func Debugf(format string, v ...interface{}) {
if w != io.Discard { DebugLog.Output(2, printfStr(format, v...))
fmt.Fprintf(InfoWriter, InfoPrefix+format+"\n", v...) }
}
func Infof(format string, v ...interface{}) {
InfoLog.Output(2, printfStr(format, v...))
} }
func Warnf(format string, v ...interface{}) { func Warnf(format string, v ...interface{}) {
if WarnWriter != io.Discard { WarnLog.Output(2, printfStr(format, v...))
WarnLog.Printf(format, v...)
}
} }
func Errorf(format string, v ...interface{}) { func Errorf(format string, v ...interface{}) {
if ErrWriter != io.Discard { ErrLog.Output(2, printfStr(format, v...))
ErrLog.Printf(format, v...)
}
} }
// Writes panic stacktrace, but keeps application alive
func Panicf(format string, v ...interface{}) {
ErrLog.Output(2, printfStr(format, v...))
panic("Panic triggered ...")
}
func Critf(format string, v ...interface{}) {
CritLog.Output(2, printfStr(format, v...))
}
// Writes crit log, stops application
func Fatalf(format string, v ...interface{}) { func Fatalf(format string, v ...interface{}) {
Errorf(format, v...) CritLog.Output(2, printfStr(format, v...))
os.Exit(1) os.Exit(1)
} }
/* SPECIAL */
// func Finfof(w io.Writer, format string, v ...interface{}) {
// if w != io.Discard {
// if logDateTime {
// currentTime := time.Now()
// fmt.Fprintf(InfoWriter, currentTime.String()+InfoPrefix+format+"\n", v...)
// } else {
// fmt.Fprintf(InfoWriter, InfoPrefix+format+"\n", v...)
// }
// }
// }

View File

@ -69,7 +69,7 @@ func (c *Cache) Get(key string, computeValue ComputeValue) interface{} {
if now.After(entry.expiration) { if now.After(entry.expiration) {
if !c.evictEntry(entry) { if !c.evictEntry(entry) {
if entry.expiration.IsZero() { if entry.expiration.IsZero() {
panic("cache entry that shoud have been waited for could not be evicted.") panic("LRUCACHE/CACHE > cache entry that shoud have been waited for could not be evicted.")
} }
c.mutex.Unlock() c.mutex.Unlock()
return entry.value return entry.value
@ -208,7 +208,7 @@ func (c *Cache) Keys(f func(key string, val interface{})) {
size := 0 size := 0
for key, e := range c.entries { for key, e := range c.entries {
if key != e.key { if key != e.key {
panic("key mismatch") panic("LRUCACHE/CACHE > key mismatch")
} }
if now.After(e.expiration) { if now.After(e.expiration) {
@ -219,13 +219,13 @@ func (c *Cache) Keys(f func(key string, val interface{})) {
if e.prev != nil { if e.prev != nil {
if e.prev.next != e { if e.prev.next != e {
panic("list corrupted") panic("LRUCACHE/CACHE > list corrupted")
} }
} }
if e.next != nil { if e.next != nil {
if e.next.prev != e { if e.next.prev != e {
panic("list corrupted") panic("LRUCACHE/CACHE > list corrupted")
} }
} }
@ -234,18 +234,18 @@ func (c *Cache) Keys(f func(key string, val interface{})) {
} }
if size != c.usedmemory { if size != c.usedmemory {
panic("size calculations failed") panic("LRUCACHE/CACHE > size calculations failed")
} }
if c.head != nil { if c.head != nil {
if c.tail == nil || c.head.prev != nil { if c.tail == nil || c.head.prev != nil {
panic("head/tail corrupted") panic("LRUCACHE/CACHE > head/tail corrupted")
} }
} }
if c.tail != nil { if c.tail != nil {
if c.head == nil || c.tail.next != nil { if c.head == nil || c.tail.next != nil {
panic("head/tail corrupted") panic("LRUCACHE/CACHE > head/tail corrupted")
} }
} }
} }
@ -281,7 +281,7 @@ func (c *Cache) unlinkEntry(e *cacheEntry) {
func (c *Cache) evictEntry(e *cacheEntry) bool { func (c *Cache) evictEntry(e *cacheEntry) bool {
if e.waitingForComputation != 0 { if e.waitingForComputation != 0 {
// panic("cannot evict this entry as other goroutines need the value") // panic("LRUCACHE/CACHE > cannot evict this entry as other goroutines need the value")
return false return false
} }

View File

@ -9,6 +9,8 @@ import (
"io" "io"
"math" "math"
"strconv" "strconv"
"github.com/ClusterCockpit/cc-backend/pkg/log"
) )
// A custom float type is used so that (Un)MarshalJSON and // A custom float type is used so that (Un)MarshalJSON and
@ -43,6 +45,7 @@ func (f *Float) UnmarshalJSON(input []byte) error {
val, err := strconv.ParseFloat(s, 64) val, err := strconv.ParseFloat(s, 64)
if err != nil { if err != nil {
log.Warn("Error while parsing custom float")
return err return err
} }
*f = Float(val) *f = Float(val)

View File

@ -133,12 +133,12 @@ const (
func (e *JobState) UnmarshalGQL(v interface{}) error { func (e *JobState) UnmarshalGQL(v interface{}) error {
str, ok := v.(string) str, ok := v.(string)
if !ok { if !ok {
return fmt.Errorf("enums must be strings") return fmt.Errorf("SCHEMA/JOB > enums must be strings")
} }
*e = JobState(str) *e = JobState(str)
if !e.Valid() { if !e.Valid() {
return errors.New("invalid job state") return errors.New("SCHEMA/JOB > invalid job state")
} }
return nil return nil

View File

@ -92,12 +92,12 @@ func (e *MetricScope) Max(other MetricScope) MetricScope {
func (e *MetricScope) UnmarshalGQL(v interface{}) error { func (e *MetricScope) UnmarshalGQL(v interface{}) error {
str, ok := v.(string) str, ok := v.(string)
if !ok { if !ok {
return fmt.Errorf("enums must be strings") return fmt.Errorf("SCHEMA/METRICS > enums must be strings")
} }
*e = MetricScope(str) *e = MetricScope(str)
if !e.Valid() { if !e.Valid() {
return fmt.Errorf("%s is not a valid MetricScope", str) return fmt.Errorf("SCHEMA/METRICS > %s is not a valid MetricScope", str)
} }
return nil return nil
} }
@ -303,7 +303,7 @@ func (jm *JobMetric) AddPercentiles(ps []int) bool {
for _, p := range ps { for _, p := range ps {
if p < 1 || p > 99 { if p < 1 || p > 99 {
panic("invalid percentile") panic("SCHEMA/METRICS > invalid percentile")
} }
if _, ok := jm.StatisticsSeries.Percentiles[p]; ok { if _, ok := jm.StatisticsSeries.Percentiles[p]; ok {

View File

@ -45,21 +45,22 @@ func Validate(k Kind, r io.Reader) (err error) {
case Config: case Config:
s, err = jsonschema.Compile("embedfs://config.schema.json") s, err = jsonschema.Compile("embedfs://config.schema.json")
default: default:
return fmt.Errorf("unkown schema kind ") return fmt.Errorf("SCHEMA/VALIDATE > unkown schema kind: %#v", k)
} }
if err != nil { if err != nil {
log.Errorf("Error while compiling json schema for kind '%#v'", k)
return err return err
} }
var v interface{} var v interface{}
if err := json.NewDecoder(r).Decode(&v); err != nil { if err := json.NewDecoder(r).Decode(&v); err != nil {
log.Errorf("schema.Validate() - Failed to decode %v", err) log.Warnf("Error while decoding raw json schema: %#v", err)
return err return err
} }
if err = s.Validate(v); err != nil { if err = s.Validate(v); err != nil {
return fmt.Errorf("%#v", err) return fmt.Errorf("SCHEMA/VALIDATE > %#v", err)
} }
return nil return nil

View File

@ -192,7 +192,7 @@ func GetUnitUnitFactor(in Unit, out Unit) (func(value interface{}) interface{},
} else if in.getMeasure() == TemperatureF && out.getMeasure() == TemperatureC { } else if in.getMeasure() == TemperatureF && out.getMeasure() == TemperatureC {
return convertTempF2TempC, nil return convertTempF2TempC, nil
} else if in.getMeasure() != out.getMeasure() || in.getUnitDenominator() != out.getUnitDenominator() { } else if in.getMeasure() != out.getMeasure() || in.getUnitDenominator() != out.getUnitDenominator() {
return func(value interface{}) interface{} { return 1.0 }, fmt.Errorf("invalid measures in in and out Unit") return func(value interface{}) interface{} { return 1.0 }, fmt.Errorf("UNITS/UNITS > invalid measures in in and out Unit")
} }
return GetPrefixPrefixFactor(in.getPrefix(), out.getPrefix()), nil return GetPrefixPrefixFactor(in.getPrefix(), out.getPrefix()), nil
} }

View File

@ -11,7 +11,6 @@ else
tar xJf job-archive-dev.tar.xz tar xJf job-archive-dev.tar.xz
rm ./job-archive-dev.tar.xz rm ./job-archive-dev.tar.xz
touch ./job.db
cd ../web/frontend cd ../web/frontend
yarn install yarn install
yarn build yarn build
@ -21,5 +20,6 @@ else
cp ./docs/config.json config.json cp ./docs/config.json config.json
go build ./cmd/cc-backend go build ./cmd/cc-backend
./cc-backend --migrate-db
./cc-backend --server --dev --init-db --add-user demo:admin:AdminDev ./cc-backend --server --dev --init-db --add-user demo:admin:AdminDev
fi fi

View File

@ -20,6 +20,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/gorilla/mux" "github.com/gorilla/mux"
@ -245,6 +246,7 @@ func setup(t *testing.T) *api.RestApi {
] ]
}` }`
log.Init("info", true)
tmpdir := t.TempDir() tmpdir := t.TempDir()
jobarchive := filepath.Join(tmpdir, "job-archive") jobarchive := filepath.Join(tmpdir, "job-archive")
if err := os.Mkdir(jobarchive, 0777); err != nil { if err := os.Mkdir(jobarchive, 0777); err != nil {
@ -267,11 +269,7 @@ func setup(t *testing.T) *api.RestApi {
t.Fatal(err) t.Fatal(err)
} }
dbfilepath := filepath.Join(tmpdir, "test.db") dbfilepath := filepath.Join(tmpdir, "test.db")
f, err := os.Create(dbfilepath) repository.MigrateDB("sqlite3", dbfilepath)
if err != nil {
t.Fatal(err)
}
f.Close()
cfgFilePath := filepath.Join(tmpdir, "config.json") cfgFilePath := filepath.Join(tmpdir, "config.json")
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil { if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
@ -292,10 +290,6 @@ func setup(t *testing.T) *api.RestApi {
t.Fatal(err) t.Fatal(err)
} }
if _, err := db.DB.Exec(repository.JobsDBSchema); err != nil {
t.Fatal(err)
}
jobRepo := repository.GetJobRepository() jobRepo := repository.GetJobRepository()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo} resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}

View File

@ -1,7 +1,7 @@
<script> <script>
import { Icon, Button, InputGroup, Input, Collapse, import { Icon, Button, InputGroup, Input, Collapse,
Navbar, NavbarBrand, Nav, NavItem, NavLink, NavbarToggler, Navbar, NavbarBrand, Nav, NavItem, NavLink, NavbarToggler,
Dropdown, DropdownToggle, DropdownMenu, DropdownItem } from 'sveltestrap' Dropdown, DropdownToggle, DropdownMenu, DropdownItem, InputGroupText } from 'sveltestrap'
export let username // empty string if auth. is disabled, otherwise the username as string export let username // empty string if auth. is disabled, otherwise the username as string
export let authlevel // integer export let authlevel // integer
@ -93,6 +93,7 @@
<InputGroup> <InputGroup>
<Input type="text" placeholder={(authlevel >= 4) ? "Search jobId / username" : "Search jobId"} name="searchId"/> <Input type="text" placeholder={(authlevel >= 4) ? "Search jobId / username" : "Search jobId"} name="searchId"/>
<Button outline type="submit"><Icon name="search"/></Button> <Button outline type="submit"><Icon name="search"/></Button>
<InputGroupText style="cursor:help;" title={isAdmin ? "Example: 'projectId:a100cd', Types are: jobId | jobName | projectId | username" | "name" : "Example: 'jobName:myjob', Types are jobId | jobName"}><Icon name="info-circle"/></InputGroupText>
</InputGroup> </InputGroup>
</form> </form>
{#if username} {#if username}

View File

@ -20,6 +20,7 @@
const stats = operationStore(`query($filter: [JobFilter!]!) { const stats = operationStore(`query($filter: [JobFilter!]!) {
rows: jobsStatistics(filter: $filter, groupBy: ${type}) { rows: jobsStatistics(filter: $filter, groupBy: ${type}) {
id id
name
totalJobs totalJobs
totalWalltime totalWalltime
totalCoreHours totalCoreHours
@ -93,6 +94,15 @@
<Icon name="sort-numeric-down" /> <Icon name="sort-numeric-down" />
</Button> </Button>
</th> </th>
{#if type == 'USER'}
<th scope="col">
Name
<Button color="{sorting.field == 'name' ? 'primary' : 'light'}"
size="sm" on:click={e => changeSorting(e, 'name')}>
<Icon name="sort-numeric-down" />
</Button>
</th>
{/if}
<th scope="col"> <th scope="col">
Total Jobs Total Jobs
<Button color="{sorting.field == 'totalJobs' ? 'primary' : 'light'}" <Button color="{sorting.field == 'totalJobs' ? 'primary' : 'light'}"
@ -137,6 +147,9 @@
{row.id} {row.id}
{/if} {/if}
</td> </td>
{#if type == 'USER'}
<td>{row?.name ? row.name : ''}</td>
{/if}
<td>{row.totalJobs}</td> <td>{row.totalJobs}</td>
<td>{row.totalWalltime}</td> <td>{row.totalWalltime}</td>
<td>{row.totalCoreHours}</td> <td>{row.totalCoreHours}</td>

View File

@ -35,16 +35,17 @@
projectMatch: filterPresets.projectMatch || 'contains', projectMatch: filterPresets.projectMatch || 'contains',
userMatch: filterPresets.userMatch || 'contains', userMatch: filterPresets.userMatch || 'contains',
cluster: filterPresets.cluster || null, cluster: filterPresets.cluster || null,
partition: filterPresets.partition || null, partition: filterPresets.partition || null,
states: filterPresets.states || filterPresets.state ? [filterPresets.state].flat() : allJobStates, states: filterPresets.states || filterPresets.state ? [filterPresets.state].flat() : allJobStates,
startTime: filterPresets.startTime || { from: null, to: null }, startTime: filterPresets.startTime || { from: null, to: null },
tags: filterPresets.tags || [], tags: filterPresets.tags || [],
duration: filterPresets.duration || { from: null, to: null }, duration: filterPresets.duration || { from: null, to: null },
jobId: filterPresets.jobId || '', jobId: filterPresets.jobId || '',
arrayJobId: filterPresets.arrayJobId || null, arrayJobId: filterPresets.arrayJobId || null,
user: filterPresets.user || '', user: filterPresets.user || '',
project: filterPresets.project || '', project: filterPresets.project || '',
jobName: filterPresets.jobName || '',
numNodes: filterPresets.numNodes || { from: null, to: null }, numNodes: filterPresets.numNodes || { from: null, to: null },
numHWThreads: filterPresets.numHWThreads || { from: null, to: null }, numHWThreads: filterPresets.numHWThreads || { from: null, to: null },
@ -94,6 +95,8 @@
items.push({ user: { [filters.userMatch]: filters.user } }) items.push({ user: { [filters.userMatch]: filters.user } })
if (filters.project) if (filters.project)
items.push({ project: { [filters.projectMatch]: filters.project } }) items.push({ project: { [filters.projectMatch]: filters.project } })
if (filters.jobName)
items.push({ jobName: { contains: filters.jobName } })
for (let stat of filters.stats) for (let stat of filters.stats)
items.push({ [stat.field]: { from: stat.from, to: stat.to } }) items.push({ [stat.field]: { from: stat.from, to: stat.to } })
@ -123,12 +126,19 @@
opts.push(`numNodes=${filters.numNodes.from}-${filters.numNodes.to}`) opts.push(`numNodes=${filters.numNodes.from}-${filters.numNodes.to}`)
if (filters.numAccelerators.from && filters.numAccelerators.to) if (filters.numAccelerators.from && filters.numAccelerators.to)
opts.push(`numAccelerators=${filters.numAccelerators.from}-${filters.numAccelerators.to}`) opts.push(`numAccelerators=${filters.numAccelerators.from}-${filters.numAccelerators.to}`)
if (filters.user) if (filters.user.length != 0)
opts.push(`user=${filters.user}`) if (filters.userMatch != 'in') {
opts.push(`user=${filters.user}`)
} else {
for (let singleUser of filters.user)
opts.push(`user=${singleUser}`)
}
if (filters.userMatch != 'contains') if (filters.userMatch != 'contains')
opts.push(`userMatch=${filters.userMatch}`) opts.push(`userMatch=${filters.userMatch}`)
if (filters.project) if (filters.project)
opts.push(`project=${filters.project}`) opts.push(`project=${filters.project}`)
if (filters.jobName)
opts.push(`jobName=${filters.jobName}`)
if (filters.projectMatch != 'contains') if (filters.projectMatch != 'contains')
opts.push(`projectMatch=${filters.projectMatch}`) opts.push(`projectMatch=${filters.projectMatch}`)

View File

@ -52,7 +52,10 @@
{/if} {/if}
{#if job.project && job.project != 'no project'} {#if job.project && job.project != 'no project'}
<br/> <br/>
<Icon name="people-fill"/> {job.project} <Icon name="people-fill"/>
<a class="fst-italic" href="/monitoring/jobs/?project={job.project}&projectMatch=eq" target="_blank">
{scrambleNames ? scramble(job.project) : job.project}
</a>
{/if} {/if}
</p> </p>

View File

@ -33,7 +33,7 @@
query($filter: [JobFilter!]!, $sorting: OrderByInput!, $paging: PageRequest! ){ query($filter: [JobFilter!]!, $sorting: OrderByInput!, $paging: PageRequest! ){
jobs(filter: $filter, order: $sorting, page: $paging) { jobs(filter: $filter, order: $sorting, page: $paging) {
items { items {
id, jobId, user, project, cluster, subCluster, startTime, id, jobId, user, project, jobName, cluster, subCluster, startTime,
duration, numNodes, numHWThreads, numAcc, walltime, duration, numNodes, numHWThreads, numAcc, walltime,
SMT, exclusive, partition, arrayJobId, SMT, exclusive, partition, arrayJobId,
monitoringStatus, state, monitoringStatus, state,

View File

@ -230,7 +230,7 @@ commondir@^1.0.1:
concat-map@0.0.1: concat-map@0.0.1:
version "0.0.1" version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s= integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==
deepmerge@^4.2.2: deepmerge@^4.2.2:
version "4.2.2" version "4.2.2"
@ -365,9 +365,9 @@ merge-stream@^2.0.0:
integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w== integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
minimatch@^3.0.4: minimatch@^3.0.4:
version "3.0.4" version "3.1.2"
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b"
integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA== integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==
dependencies: dependencies:
brace-expansion "^1.1.7" brace-expansion "^1.1.7"

View File

@ -24,6 +24,7 @@ var frontendFiles embed.FS
func ServeFiles() http.Handler { func ServeFiles() http.Handler {
publicFiles, err := fs.Sub(frontendFiles, "frontend/public") publicFiles, err := fs.Sub(frontendFiles, "frontend/public")
if err != nil { if err != nil {
log.Fatalf("WEB/WEB > cannot find frontend public files")
panic(err) panic(err)
} }
return http.FileServer(http.FS(publicFiles)) return http.FileServer(http.FS(publicFiles))
@ -47,6 +48,7 @@ func init() {
templates[strings.TrimPrefix(path, "templates/")] = template.Must(template.Must(base.Clone()).ParseFS(templateFiles, path)) templates[strings.TrimPrefix(path, "templates/")] = template.Must(template.Must(base.Clone()).ParseFS(templateFiles, path))
return nil return nil
}); err != nil { }); err != nil {
log.Fatalf("WEB/WEB > cannot find frontend template files")
panic(err) panic(err)
} }
@ -79,6 +81,7 @@ type Page struct {
func RenderTemplate(rw http.ResponseWriter, r *http.Request, file string, page *Page) { func RenderTemplate(rw http.ResponseWriter, r *http.Request, file string, page *Page) {
t, ok := templates[file] t, ok := templates[file]
if !ok { if !ok {
log.Fatalf("WEB/WEB > template '%s' not found", file)
panic("template not found") panic("template not found")
} }
@ -88,8 +91,8 @@ func RenderTemplate(rw http.ResponseWriter, r *http.Request, file string, page *
} }
} }
log.Infof("%v\n", page.Config) log.Infof("Page config : %v\n", page.Config)
if err := t.Execute(rw, page); err != nil { if err := t.Execute(rw, page); err != nil {
log.Errorf("template error: %s", err.Error()) log.Errorf("Template error: %s", err.Error())
} }
} }