diff --git a/CLAUDE.md b/CLAUDE.md index 406f11ba..2148fdca 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -22,7 +22,7 @@ make make frontend # Build only the backend (requires frontend to be built first) -go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.4.4 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend +go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.5.0 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend ``` ### Testing @@ -41,7 +41,7 @@ go test ./internal/repository ### Code Generation ```bash -# Regenerate GraphQL schema and resolvers (after modifying api/*.graphqls) +# Regenerate GraphQL schema and resolvers (after modifying api/schema.graphqls) make graphql # Regenerate Swagger/OpenAPI docs (after modifying API comments) @@ -90,7 +90,7 @@ The backend follows a layered architecture with clear separation of concerns: - Transaction support for batch operations - **internal/api**: REST API endpoints (Swagger/OpenAPI documented) - **internal/graph**: GraphQL API (uses gqlgen) - - Schema in `api/*.graphqls` + - Schema in `api/schema.graphqls` - Generated code in `internal/graph/generated/` - Resolvers in `internal/graph/schema.resolvers.go` - **internal/auth**: Authentication layer @@ -108,7 +108,7 @@ The backend follows a layered architecture with clear separation of concerns: - File system backend (default) - S3 backend - SQLite backend (experimental) -- **pkg/nats**: NATS client and message decoding utilities +- **internal/metricstoreclient**: Client for cc-metric-store queries ### Frontend Structure @@ -138,7 +138,7 @@ recommended). Configuration is per-cluster in `config.json`. 3. The first authenticator that returns true performs the actual `Login` 4. JWT tokens are used for API authentication -**Database Migrations**: SQL migrations in `internal/repository/migrations/` are +**Database Migrations**: SQL migrations in `internal/repository/migrations/sqlite3/` are applied automatically on startup. Version tracking in `version` table. **Scopes**: Metrics can be collected at different scopes: @@ -173,7 +173,7 @@ applied automatically on startup. Version tracking in `version` table. **GraphQL** (gqlgen): -- Schema: `api/*.graphqls` +- Schema: `api/schema.graphqls` - Config: `gqlgen.yml` - Generated code: `internal/graph/generated/` - Custom resolvers: `internal/graph/schema.resolvers.go` @@ -182,7 +182,7 @@ applied automatically on startup. Version tracking in `version` table. **Swagger/OpenAPI**: - Annotations in `internal/api/*.go` -- Generated docs: `api/docs.go`, `api/swagger.yaml` +- Generated docs: `internal/api/docs.go`, `api/swagger.yaml` - Run `make swagger` after API changes ## Testing Conventions @@ -196,7 +196,7 @@ applied automatically on startup. Version tracking in `version` table. ### Adding a new GraphQL field -1. Edit schema in `api/*.graphqls` +1. Edit schema in `api/schema.graphqls` 2. Run `make graphql` 3. Implement resolver in `internal/graph/schema.resolvers.go` @@ -215,7 +215,7 @@ applied automatically on startup. Version tracking in `version` table. ### Modifying database schema -1. Create new migration in `internal/repository/migrations/` +1. Create new migration in `internal/repository/migrations/sqlite3/` 2. Increment `repository.Version` 3. Test with fresh database and existing database diff --git a/README.md b/README.md index 475401f4..d01c7140 100644 --- a/README.md +++ b/README.md @@ -173,14 +173,14 @@ ln -s ./var/job-archive Job classification and application detection - [`taskmanager`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/taskmanager) Background task management and scheduled jobs + - [`metricstoreclient`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricstoreclient) + Client for cc-metric-store queries - [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg) contains Go packages that can be used by other projects. - [`archive`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/archive) - Job archive backend implementations (filesystem, S3) + Job archive backend implementations (filesystem, S3, SQLite) - [`metricstore`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/metricstore) In-memory metric data store with checkpointing and metric loading - - [`nats`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/nats) - NATS client and message handling - [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools) Additional command line helper tools. - [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager) diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 250d4860..91f8360f 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -299,6 +299,10 @@ func (s *Server) init() error { }) } + // Set NotFound on the router so chi uses it for all unmatched routes, + // including those under subrouters like /api, /userapi, /frontend, etc. + s.router.NotFound(notFoundHandler) + if config.Keys.EmbedStaticFiles { if i, err := os.Stat("./var/img"); err == nil { if i.IsDir() { diff --git a/go.mod b/go.mod index 77da0104..6bcc3b08 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ tool ( require ( github.com/99designs/gqlgen v0.17.85 - github.com/ClusterCockpit/cc-lib/v2 v2.2.1 + github.com/ClusterCockpit/cc-lib/v2 v2.2.2 github.com/Masterminds/squirrel v1.5.4 github.com/aws/aws-sdk-go-v2 v1.41.1 github.com/aws/aws-sdk-go-v2/config v1.32.6 diff --git a/go.sum b/go.sum index 40b90751..f4f41dfd 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+ github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/ClusterCockpit/cc-lib/v2 v2.2.1 h1:iCVas+Jc61zFH5S2VG3H1sc7tsn+U4lOJwUYjYZEims= github.com/ClusterCockpit/cc-lib/v2 v2.2.1/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw= +github.com/ClusterCockpit/cc-lib/v2 v2.2.2 h1:ye4RY57I19c2cXr3XWZBS/QYYgQVeGFvsiu5HkyKq9E= +github.com/ClusterCockpit/cc-lib/v2 v2.2.2/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw= github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= diff --git a/internal/api/job.go b/internal/api/job.go index d67dbb93..9bd93b1c 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -754,6 +754,7 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) { return } + isCached := false job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime) if err != nil { // Try cached jobs if not found in main repository @@ -764,9 +765,10 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) { return } job = cachedJob + isCached = true } - api.checkAndHandleStopJob(rw, job, req) + api.checkAndHandleStopJob(rw, job, req, isCached) } // deleteJobByID godoc @@ -923,7 +925,7 @@ func (api *RestAPI) deleteJobBefore(rw http.ResponseWriter, r *http.Request) { } } -func (api *RestAPI) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobAPIRequest) { +func (api *RestAPI) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobAPIRequest, isCached bool) { // Sanity checks if job.State != schema.JobStateRunning { handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, *job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw) @@ -948,11 +950,21 @@ func (api *RestAPI) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo api.JobRepository.Mutex.Lock() defer api.JobRepository.Mutex.Unlock() - if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { - if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { - handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, *job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw) + // If the job is still in job_cache, transfer it to the job table first + // so that job.ID always points to the job table for downstream code + if isCached { + newID, err := api.JobRepository.TransferCachedJobToMain(*job.ID) + if err != nil { + handleError(fmt.Errorf("jobId %d (id %d) on %s : transferring cached job failed: %w", job.JobID, *job.ID, job.Cluster, err), http.StatusInternalServerError, rw) return } + cclog.Infof("transferred cached job to main table: old id %d -> new id %d (jobId=%d)", *job.ID, newID, job.JobID) + job.ID = &newID + } + + if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { + handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, *job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw) + return } cclog.Infof("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", *job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State) diff --git a/internal/api/nats.go b/internal/api/nats.go index c0a8c174..0e929426 100644 --- a/internal/api/nats.go +++ b/internal/api/nats.go @@ -251,6 +251,7 @@ func (api *NatsAPI) handleStopJob(payload string) { return } + isCached := false job, err := api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime) if err != nil { cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime) @@ -260,6 +261,7 @@ func (api *NatsAPI) handleStopJob(payload string) { return } job = cachedJob + isCached = true } if job.State != schema.JobStateRunning { @@ -287,16 +289,26 @@ func (api *NatsAPI) handleStopJob(payload string) { api.JobRepository.Mutex.Lock() defer api.JobRepository.Mutex.Unlock() - if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { - if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { - cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: marking job as '%s' failed: %v", - job.JobID, job.ID, job.Cluster, job.State, err) + // If the job is still in job_cache, transfer it to the job table first + if isCached { + newID, err := api.JobRepository.TransferCachedJobToMain(*job.ID) + if err != nil { + cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: transferring cached job failed: %v", + job.JobID, *job.ID, job.Cluster, err) return } + cclog.Infof("NATS: transferred cached job to main table: old id %d -> new id %d (jobId=%d)", *job.ID, newID, job.JobID) + job.ID = &newID + } + + if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { + cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: marking job as '%s' failed: %v", + job.JobID, *job.ID, job.Cluster, job.State, err) + return } cclog.Infof("NATS: archiving job (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", - job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State) + *job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State) if job.MonitoringStatus == schema.MonitoringStatusDisabled { return diff --git a/internal/archiver/README.md b/internal/archiver/README.md index 48aed797..53d00948 100644 --- a/internal/archiver/README.md +++ b/internal/archiver/README.md @@ -170,7 +170,6 @@ All exported functions are safe for concurrent use: - `Start()` - Safe to call once - `TriggerArchiving()` - Safe from multiple goroutines - `Shutdown()` - Safe to call once -- `WaitForArchiving()` - Deprecated, but safe Internal state is protected by: - Channel synchronization (`archiveChannel`) diff --git a/internal/auth/auth.go b/internal/auth/auth.go index df618a3f..8a2073b5 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -294,6 +294,11 @@ func handleOIDCUser(OIDCUser *schema.User) { handleUserSync(OIDCUser, Keys.OpenIDConfig.SyncUserOnLogin, Keys.OpenIDConfig.UpdateUserOnLogin) } +// handleLdapUser syncs LDAP user with database +func handleLdapUser(ldapUser *schema.User) { + handleUserSync(ldapUser, Keys.LdapConfig.SyncUserOnLogin, Keys.LdapConfig.UpdateUserOnLogin) +} + func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error { session, err := auth.sessionStore.New(r, "session") if err != nil { diff --git a/internal/auth/ldap.go b/internal/auth/ldap.go index 5e12f07b..a174bb9d 100644 --- a/internal/auth/ldap.go +++ b/internal/auth/ldap.go @@ -6,11 +6,12 @@ package auth import ( - "errors" "fmt" + "net" "net/http" "os" "strings" + "time" "github.com/ClusterCockpit/cc-backend/internal/repository" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" @@ -25,16 +26,19 @@ type LdapConfig struct { UserBind string `json:"user-bind"` UserFilter string `json:"user-filter"` UserAttr string `json:"username-attr"` + UIDAttr string `json:"uid-attr"` SyncInterval string `json:"sync-interval"` // Parsed using time.ParseDuration. SyncDelOldUsers bool `json:"sync-del-old-users"` - // Should an non-existent user be added to the DB if user exists in ldap directory - SyncUserOnLogin bool `json:"sync-user-on-login"` + // Should a non-existent user be added to the DB if user exists in ldap directory + SyncUserOnLogin bool `json:"sync-user-on-login"` + UpdateUserOnLogin bool `json:"update-user-on-login"` } type LdapAuthenticator struct { syncPassword string UserAttr string + UIDAttr string } var _ Authenticator = (*LdapAuthenticator)(nil) @@ -51,6 +55,12 @@ func (la *LdapAuthenticator) Init() error { la.UserAttr = "gecos" } + if Keys.LdapConfig.UIDAttr != "" { + la.UIDAttr = Keys.LdapConfig.UIDAttr + } else { + la.UIDAttr = "uid" + } + return nil } @@ -66,55 +76,44 @@ func (la *LdapAuthenticator) CanLogin( if user.AuthSource == schema.AuthViaLDAP { return user, true } - } else { - if lc.SyncUserOnLogin { - l, err := la.getLdapConnection(true) - if err != nil { - cclog.Error("LDAP connection error") - return nil, false - } - defer l.Close() - - // Search for the given username - searchRequest := ldap.NewSearchRequest( - lc.UserBase, - ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false, - fmt.Sprintf("(&%s(uid=%s))", lc.UserFilter, username), - []string{"dn", "uid", la.UserAttr}, nil) - - sr, err := l.Search(searchRequest) - if err != nil { - cclog.Warn(err) - return nil, false - } - - if len(sr.Entries) != 1 { - cclog.Warn("LDAP: User does not exist or too many entries returned") - return nil, false - } - - entry := sr.Entries[0] - name := entry.GetAttributeValue(la.UserAttr) - var roles []string - roles = append(roles, schema.GetRoleString(schema.RoleUser)) - projects := make([]string, 0) - - user = &schema.User{ - Username: username, - Name: name, - Roles: roles, - Projects: projects, - AuthType: schema.AuthSession, - AuthSource: schema.AuthViaLDAP, - } - - if err := repository.GetUserRepository().AddUser(user); err != nil { - cclog.Errorf("User '%s' LDAP: Insert into DB failed", username) - return nil, false - } - - return user, true + } else if lc.SyncUserOnLogin { + l, err := la.getLdapConnection(true) + if err != nil { + cclog.Error("LDAP connection error") + return nil, false } + defer l.Close() + + // Search for the given username + searchRequest := ldap.NewSearchRequest( + lc.UserBase, + ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false, + fmt.Sprintf("(&%s(%s=%s))", lc.UserFilter, la.UIDAttr, ldap.EscapeFilter(username)), + []string{"dn", la.UIDAttr, la.UserAttr}, nil) + + sr, err := l.Search(searchRequest) + if err != nil { + cclog.Warn(err) + return nil, false + } + + if len(sr.Entries) != 1 { + cclog.Warn("LDAP: User does not exist or too many entries returned") + return nil, false + } + + entry := sr.Entries[0] + user = &schema.User{ + Username: username, + Name: entry.GetAttributeValue(la.UserAttr), + Roles: []string{schema.GetRoleString(schema.RoleUser)}, + Projects: make([]string, 0), + AuthType: schema.AuthSession, + AuthSource: schema.AuthViaLDAP, + } + + handleLdapUser(user) + return user, true } return nil, false @@ -132,7 +131,7 @@ func (la *LdapAuthenticator) Login( } defer l.Close() - userDn := strings.ReplaceAll(Keys.LdapConfig.UserBind, "{username}", user.Username) + userDn := strings.ReplaceAll(Keys.LdapConfig.UserBind, "{username}", ldap.EscapeDN(user.Username)) if err := l.Bind(userDn, r.FormValue("password")); err != nil { cclog.Errorf("AUTH/LDAP > Authentication for user %s failed: %v", user.Username, err) @@ -170,7 +169,7 @@ func (la *LdapAuthenticator) Sync() error { lc.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false, lc.UserFilter, - []string{"dn", "uid", la.UserAttr}, nil)) + []string{"dn", la.UIDAttr, la.UserAttr}, nil)) if err != nil { cclog.Warn("LDAP search error") return err @@ -178,9 +177,9 @@ func (la *LdapAuthenticator) Sync() error { newnames := map[string]string{} for _, entry := range ldapResults.Entries { - username := entry.GetAttributeValue("uid") + username := entry.GetAttributeValue(la.UIDAttr) if username == "" { - return errors.New("no attribute 'uid'") + return fmt.Errorf("no attribute '%s'", la.UIDAttr) } _, ok := users[username] @@ -194,20 +193,19 @@ func (la *LdapAuthenticator) Sync() error { for username, where := range users { if where == InDB && lc.SyncDelOldUsers { - ur.DelUser(username) + if err := ur.DelUser(username); err != nil { + cclog.Errorf("User '%s' LDAP: Delete from DB failed: %v", username, err) + return err + } cclog.Debugf("sync: remove %v (does not show up in LDAP anymore)", username) } else if where == InLdap { name := newnames[username] - var roles []string - roles = append(roles, schema.GetRoleString(schema.RoleUser)) - projects := make([]string, 0) - user := &schema.User{ Username: username, Name: name, - Roles: roles, - Projects: projects, + Roles: []string{schema.GetRoleString(schema.RoleUser)}, + Projects: make([]string, 0), AuthSource: schema.AuthViaLDAP, } @@ -224,11 +222,13 @@ func (la *LdapAuthenticator) Sync() error { func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) { lc := Keys.LdapConfig - conn, err := ldap.DialURL(lc.URL) + conn, err := ldap.DialURL(lc.URL, + ldap.DialWithDialer(&net.Dialer{Timeout: 10 * time.Second})) if err != nil { cclog.Warn("LDAP URL dial failed") return nil, err } + conn.SetTimeout(30 * time.Second) if admin { if err := conn.Bind(lc.SearchDN, la.syncPassword); err != nil { diff --git a/internal/auth/oidc.go b/internal/auth/oidc.go index f81b651f..ec6c77a7 100644 --- a/internal/auth/oidc.go +++ b/internal/auth/oidc.go @@ -9,6 +9,7 @@ import ( "context" "crypto/rand" "encoding/base64" + "fmt" "io" "net/http" "os" @@ -50,6 +51,7 @@ func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value strin MaxAge: int(time.Hour.Seconds()), Secure: r.TLS != nil, HttpOnly: true, + SameSite: http.SameSiteLaxMode, } http.SetCookie(w, c) } @@ -77,8 +79,7 @@ func NewOIDC(a *Authentication) *OIDC { ClientID: clientID, ClientSecret: clientSecret, Endpoint: provider.Endpoint(), - RedirectURL: "oidc-callback", - Scopes: []string{oidc.ScopeOpenID, "profile", "email"}, + Scopes: []string{oidc.ScopeOpenID, "profile"}, } oa := &OIDC{provider: provider, client: client, clientID: clientID, authentication: a} @@ -122,54 +123,93 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) { token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier)) if err != nil { - http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError) + cclog.Errorf("token exchange failed: %s", err.Error()) + http.Error(rw, "Authentication failed during token exchange", http.StatusInternalServerError) return } // Get user info from OIDC provider with same timeout userInfo, err := oa.provider.UserInfo(ctx, oauth2.StaticTokenSource(token)) if err != nil { - http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError) + cclog.Errorf("failed to get userinfo: %s", err.Error()) + http.Error(rw, "Failed to retrieve user information", http.StatusInternalServerError) return } - // // Extract the ID Token from OAuth2 token. - // rawIDToken, ok := token.Extra("id_token").(string) - // if !ok { - // http.Error(rw, "Cannot access idToken", http.StatusInternalServerError) - // } - // - // verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID}) - // // Parse and verify ID Token payload. - // idToken, err := verifier.Verify(context.Background(), rawIDToken) - // if err != nil { - // http.Error(rw, "Failed to extract idToken: "+err.Error(), http.StatusInternalServerError) - // } + // Verify ID token and nonce to prevent replay attacks + rawIDToken, ok := token.Extra("id_token").(string) + if !ok { + http.Error(rw, "ID token not found in response", http.StatusInternalServerError) + return + } + + nonceCookie, err := r.Cookie("nonce") + if err != nil { + http.Error(rw, "nonce cookie not found", http.StatusBadRequest) + return + } + + verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID}) + idToken, err := verifier.Verify(ctx, rawIDToken) + if err != nil { + cclog.Errorf("ID token verification failed: %s", err.Error()) + http.Error(rw, "ID token verification failed", http.StatusInternalServerError) + return + } + + if idToken.Nonce != nonceCookie.Value { + http.Error(rw, "Nonce mismatch", http.StatusBadRequest) + return + } projects := make([]string, 0) - // Extract custom claims + // Extract custom claims from userinfo var claims struct { Username string `json:"preferred_username"` Name string `json:"name"` - Profile struct { + // Keycloak realm-level roles + RealmAccess struct { + Roles []string `json:"roles"` + } `json:"realm_access"` + // Keycloak client-level roles + ResourceAccess struct { Client struct { Roles []string `json:"roles"` } `json:"clustercockpit"` } `json:"resource_access"` } if err := userInfo.Claims(&claims); err != nil { - http.Error(rw, "Failed to extract Claims: "+err.Error(), http.StatusInternalServerError) + cclog.Errorf("failed to extract claims: %s", err.Error()) + http.Error(rw, "Failed to extract user claims", http.StatusInternalServerError) + return + } + + if claims.Username == "" { + http.Error(rw, "Username claim missing from OIDC provider", http.StatusBadRequest) + return + } + + // Merge roles from both client-level and realm-level access + oidcRoles := append(claims.ResourceAccess.Client.Roles, claims.RealmAccess.Roles...) + + roleSet := make(map[string]bool) + for _, r := range oidcRoles { + switch r { + case "user": + roleSet[schema.GetRoleString(schema.RoleUser)] = true + case "admin": + roleSet[schema.GetRoleString(schema.RoleAdmin)] = true + case "manager": + roleSet[schema.GetRoleString(schema.RoleManager)] = true + case "support": + roleSet[schema.GetRoleString(schema.RoleSupport)] = true + } } var roles []string - for _, r := range claims.Profile.Client.Roles { - switch r { - case "user": - roles = append(roles, schema.GetRoleString(schema.RoleUser)) - case "admin": - roles = append(roles, schema.GetRoleString(schema.RoleAdmin)) - } + for role := range roleSet { + roles = append(roles, role) } if len(roles) == 0 { @@ -188,8 +228,12 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) { handleOIDCUser(user) } - oa.authentication.SaveSession(rw, r, user) - cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects) + if err := oa.authentication.SaveSession(rw, r, user); err != nil { + cclog.Errorf("session save failed for user %q: %s", user.Username, err.Error()) + http.Error(rw, "Failed to create session", http.StatusInternalServerError) + return + } + cclog.Infof("login successful: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects) userCtx := context.WithValue(r.Context(), repository.ContextUserKey, user) http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(userCtx)) } @@ -206,7 +250,24 @@ func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) { codeVerifier := oauth2.GenerateVerifier() setCallbackCookie(rw, r, "verifier", codeVerifier) + // Generate nonce for ID token replay protection + nonce, err := randString(16) + if err != nil { + http.Error(rw, "Internal error", http.StatusInternalServerError) + return + } + setCallbackCookie(rw, r, "nonce", nonce) + + // Build redirect URL from the incoming request + scheme := "https" + if r.TLS == nil && r.Header.Get("X-Forwarded-Proto") != "https" { + scheme = "http" + } + oa.client.RedirectURL = fmt.Sprintf("%s://%s/oidc-callback", scheme, r.Host) + // Redirect user to consent page to ask for permission - url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.S256ChallengeOption(codeVerifier)) + url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline, + oauth2.S256ChallengeOption(codeVerifier), + oidc.Nonce(nonce)) http.Redirect(rw, r, url, http.StatusFound) } diff --git a/internal/auth/schema.go b/internal/auth/schema.go index 496e899b..b6ee0702 100644 --- a/internal/auth/schema.go +++ b/internal/auth/schema.go @@ -92,9 +92,17 @@ var configSchema = ` "description": "Delete obsolete users in database.", "type": "boolean" }, + "uid-attr": { + "description": "LDAP attribute used as login username. Default: uid", + "type": "string" + }, "sync-user-on-login": { "description": "Add non-existent user to DB at login attempt if user exists in Ldap directory", "type": "boolean" + }, + "update-user-on-login": { + "description": "Should an existent user attributes in the DB be updated at login attempt with values from LDAP.", + "type": "boolean" } }, "required": ["url", "user-base", "search-dn", "user-bind", "user-filter"] diff --git a/internal/repository/jobCreate.go b/internal/repository/jobCreate.go index 6114ae5e..9f4f366d 100644 --- a/internal/repository/jobCreate.go +++ b/internal/repository/jobCreate.go @@ -71,8 +71,9 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) { jobs = append(jobs, job) } + // Use INSERT OR IGNORE to skip jobs already transferred by the stop path _, err = r.DB.Exec( - "INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache") + "INSERT OR IGNORE INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache") if err != nil { cclog.Warnf("Error while Job sync: %v", err) return nil, err @@ -87,6 +88,29 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) { return jobs, nil } +// TransferCachedJobToMain moves a job from job_cache to the job table. +// Caller must hold r.Mutex. Returns the new job table ID. +func (r *JobRepository) TransferCachedJobToMain(cacheID int64) (int64, error) { + res, err := r.DB.Exec( + "INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache WHERE id = ?", + cacheID) + if err != nil { + return 0, fmt.Errorf("transferring cached job %d to main table failed: %w", cacheID, err) + } + + newID, err := res.LastInsertId() + if err != nil { + return 0, fmt.Errorf("getting new job ID after transfer failed: %w", err) + } + + _, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", cacheID) + if err != nil { + return 0, fmt.Errorf("deleting cached job %d after transfer failed: %w", cacheID, err) + } + + return newID, nil +} + // Start inserts a new job in the table, returning the unique job ID. // Statistics are not transfered! func (r *JobRepository) Start(job *schema.Job) (id int64, err error) { @@ -129,20 +153,3 @@ func (r *JobRepository) Stop( return err } -func (r *JobRepository) StopCached( - jobID int64, - duration int32, - state schema.JobState, - monitoringStatus int32, -) (err error) { - // Note: StopCached updates job_cache table, not the main job table - // Cache invalidation happens when job is synced to main table - stmt := sq.Update("job_cache"). - Set("job_state", state). - Set("duration", duration). - Set("monitoring_status", monitoringStatus). - Where("job_cache.id = ?", jobID) - - _, err = stmt.RunWith(r.stmtCache).Exec() - return err -} diff --git a/internal/repository/jobCreate_test.go b/internal/repository/jobCreate_test.go index 3a586482..9e72555f 100644 --- a/internal/repository/jobCreate_test.go +++ b/internal/repository/jobCreate_test.go @@ -331,58 +331,60 @@ func TestStop(t *testing.T) { }) } -func TestStopCached(t *testing.T) { +func TestTransferCachedJobToMain(t *testing.T) { r := setup(t) - t.Run("successful stop cached job", func(t *testing.T) { + t.Run("successful transfer from cache to main", func(t *testing.T) { // Insert a job in job_cache job := createTestJob(999009, "testcluster") - id, err := r.Start(job) + cacheID, err := r.Start(job) require.NoError(t, err) - // Stop the cached job - duration := int32(3600) - state := schema.JobStateCompleted - monitoringStatus := int32(schema.MonitoringStatusArchivingSuccessful) + // Transfer the cached job to the main table + r.Mutex.Lock() + newID, err := r.TransferCachedJobToMain(cacheID) + r.Mutex.Unlock() + require.NoError(t, err, "TransferCachedJobToMain should succeed") + assert.NotEqual(t, cacheID, newID, "New ID should differ from cache ID") - err = r.StopCached(id, duration, state, monitoringStatus) - require.NoError(t, err, "StopCached should succeed") - - // Verify job was updated in job_cache table - var retrievedDuration int32 - var retrievedState string - var retrievedMonStatus int32 - err = r.DB.QueryRow(`SELECT duration, job_state, monitoring_status FROM job_cache WHERE id = ?`, id).Scan( - &retrievedDuration, &retrievedState, &retrievedMonStatus) + // Verify job exists in job table + var count int + err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE id = ?`, newID).Scan(&count) require.NoError(t, err) - assert.Equal(t, duration, retrievedDuration) - assert.Equal(t, string(state), retrievedState) - assert.Equal(t, monitoringStatus, retrievedMonStatus) + assert.Equal(t, 1, count, "Job should exist in main table") + + // Verify job was removed from job_cache + err = r.DB.QueryRow(`SELECT COUNT(*) FROM job_cache WHERE id = ?`, cacheID).Scan(&count) + require.NoError(t, err) + assert.Equal(t, 0, count, "Job should be removed from cache") // Clean up - _, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id) + _, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID) require.NoError(t, err) }) - t.Run("stop cached job does not affect job table", func(t *testing.T) { + t.Run("transfer preserves job data", func(t *testing.T) { // Insert a job in job_cache job := createTestJob(999010, "testcluster") - id, err := r.Start(job) + cacheID, err := r.Start(job) require.NoError(t, err) - // Stop the cached job - err = r.StopCached(id, 3600, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful)) + // Transfer the cached job + r.Mutex.Lock() + newID, err := r.TransferCachedJobToMain(cacheID) + r.Mutex.Unlock() require.NoError(t, err) - // Verify job table was not affected - var count int - err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE job_id = ? AND cluster = ?`, - job.JobID, job.Cluster).Scan(&count) + // Verify the transferred job has the correct data + var jobID int64 + var cluster string + err = r.DB.QueryRow(`SELECT job_id, cluster FROM job WHERE id = ?`, newID).Scan(&jobID, &cluster) require.NoError(t, err) - assert.Equal(t, 0, count, "Job table should not be affected by StopCached") + assert.Equal(t, job.JobID, jobID) + assert.Equal(t, job.Cluster, cluster) // Clean up - _, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id) + _, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID) require.NoError(t, err) }) } diff --git a/internal/repository/node_test.go b/internal/repository/node_test.go index b863dc69..4286ab34 100644 --- a/internal/repository/node_test.go +++ b/internal/repository/node_test.go @@ -139,6 +139,13 @@ func nodeTestSetup(t *testing.T) { } archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive) + if err := ResetConnection(); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + ResetConnection() + }) + Connect(dbfilepath) if err := archive.Init(json.RawMessage(archiveCfg)); err != nil { diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 34852830..b9496143 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -6,6 +6,8 @@ package repository import ( "context" + "os" + "path/filepath" "testing" "github.com/ClusterCockpit/cc-backend/internal/graph/model" @@ -148,8 +150,22 @@ func getContext(tb testing.TB) context.Context { func setup(tb testing.TB) *JobRepository { tb.Helper() cclog.Init("warn", true) - dbfile := "testdata/job.db" - err := MigrateDB(dbfile) + + // Copy test DB to a temp file for test isolation + srcData, err := os.ReadFile("testdata/job.db") + noErr(tb, err) + dbfile := filepath.Join(tb.TempDir(), "job.db") + err = os.WriteFile(dbfile, srcData, 0o644) + noErr(tb, err) + + // Reset singletons so Connect uses the new temp DB + err = ResetConnection() + noErr(tb, err) + tb.Cleanup(func() { + ResetConnection() + }) + + err = MigrateDB(dbfile) noErr(tb, err) Connect(dbfile) return GetJobRepository() diff --git a/internal/repository/stats_test.go b/internal/repository/stats_test.go index a8dfc818..a6c2da17 100644 --- a/internal/repository/stats_test.go +++ b/internal/repository/stats_test.go @@ -25,17 +25,11 @@ func TestBuildJobStatsQuery(t *testing.T) { func TestJobStats(t *testing.T) { r := setup(t) - // First, count the actual jobs in the database (excluding test jobs) var expectedCount int - err := r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE cluster != 'testcluster'`).Scan(&expectedCount) + err := r.DB.QueryRow(`SELECT COUNT(*) FROM job`).Scan(&expectedCount) noErr(t, err) - filter := &model.JobFilter{} - // Exclude test jobs created by other tests - testCluster := "testcluster" - filter.Cluster = &model.StringInput{Neq: &testCluster} - - stats, err := r.JobsStats(getContext(t), []*model.JobFilter{filter}) + stats, err := r.JobsStats(getContext(t), []*model.JobFilter{}) noErr(t, err) if stats[0].TotalJobs != expectedCount { diff --git a/internal/repository/userConfig_test.go b/internal/repository/userConfig_test.go index cee59304..17ccbf78 100644 --- a/internal/repository/userConfig_test.go +++ b/internal/repository/userConfig_test.go @@ -31,8 +31,25 @@ func setupUserTest(t *testing.T) *UserCfgRepo { }` cclog.Init("info", true) - dbfilepath := "testdata/job.db" - err := MigrateDB(dbfilepath) + + // Copy test DB to a temp file for test isolation + srcData, err := os.ReadFile("testdata/job.db") + if err != nil { + t.Fatal(err) + } + dbfilepath := filepath.Join(t.TempDir(), "job.db") + if err := os.WriteFile(dbfilepath, srcData, 0o644); err != nil { + t.Fatal(err) + } + + if err := ResetConnection(); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + ResetConnection() + }) + + err = MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/pkg/archive/clusterConfig.go b/pkg/archive/clusterConfig.go index 272eeb35..64851365 100644 --- a/pkg/archive/clusterConfig.go +++ b/pkg/archive/clusterConfig.go @@ -25,6 +25,7 @@ func initClusterConfig() error { GlobalUserMetricList = []*schema.GlobalMetricListItem{} NodeLists = map[string]map[string]NodeList{} metricLookup := make(map[string]schema.GlobalMetricListItem) + userMetricLookup := make(map[string]schema.GlobalMetricListItem) for _, c := range ar.GetClusters() { @@ -62,11 +63,12 @@ func initClusterConfig() error { if _, ok := metricLookup[mc.Name]; !ok { metricLookup[mc.Name] = schema.GlobalMetricListItem{ - Name: mc.Name, Scope: mc.Scope, Restrict: mc.Restrict, Unit: mc.Unit, Footprint: mc.Footprint, + Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint, } } availability := schema.ClusterSupport{Cluster: cluster.Name} + userAvailability := schema.ClusterSupport{Cluster: cluster.Name} scLookup := make(map[string]*schema.SubClusterConfig) for _, scc := range mc.SubClusters { @@ -94,6 +96,7 @@ func initClusterConfig() error { newMetric.Footprint = mc.Footprint } + isRestricted := mc.Restrict if cfg, ok := scLookup[sc.Name]; ok { if cfg.Remove { continue @@ -105,9 +108,13 @@ func initClusterConfig() error { newMetric.Footprint = cfg.Footprint newMetric.Energy = cfg.Energy newMetric.LowerIsBetter = cfg.LowerIsBetter + isRestricted = cfg.Restrict } availability.SubClusters = append(availability.SubClusters, sc.Name) + if !isRestricted { + userAvailability.SubClusters = append(userAvailability.SubClusters, sc.Name) + } sc.MetricConfig = append(sc.MetricConfig, newMetric) if newMetric.Footprint != "" { @@ -124,6 +131,17 @@ func initClusterConfig() error { item := metricLookup[mc.Name] item.Availability = append(item.Availability, availability) metricLookup[mc.Name] = item + + if len(userAvailability.SubClusters) > 0 { + userItem, ok := userMetricLookup[mc.Name] + if !ok { + userItem = schema.GlobalMetricListItem{ + Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint, + } + } + userItem.Availability = append(userItem.Availability, userAvailability) + userMetricLookup[mc.Name] = userItem + } } Clusters = append(Clusters, cluster) @@ -144,9 +162,9 @@ func initClusterConfig() error { for _, metric := range metricLookup { GlobalMetricList = append(GlobalMetricList, &metric) - if !metric.Restrict { - GlobalUserMetricList = append(GlobalUserMetricList, &metric) - } + } + for _, metric := range userMetricLookup { + GlobalUserMetricList = append(GlobalUserMetricList, &metric) } return nil diff --git a/tools/convert-pem-pubkey/Readme.md b/tools/convert-pem-pubkey/Readme.md index 1429acc4..22fd0db2 100644 --- a/tools/convert-pem-pubkey/Readme.md +++ b/tools/convert-pem-pubkey/Readme.md @@ -16,7 +16,7 @@ CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc=" Instructions -- `cd tools/convert-pem-pubkey-for-cc/` +- `cd tools/convert-pem-pubkey/` - Insert your public ed25519 PEM key into `dummy.pub` - `go run . dummy.pub` - Copy the result into ClusterCockpit's `.env` diff --git a/web/frontend/README.md b/web/frontend/README.md index d61d302e..4dff4405 100644 --- a/web/frontend/README.md +++ b/web/frontend/README.md @@ -1,11 +1,11 @@ # cc-frontend -[![Build](https://github.com/ClusterCockpit/cc-svelte-datatable/actions/workflows/build.yml/badge.svg)](https://github.com/ClusterCockpit/cc-svelte-datatable/actions/workflows/build.yml) +[![Build](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml) -A frontend for [ClusterCockpit](https://github.com/ClusterCockpit/ClusterCockpit) and [cc-backend](https://github.com/ClusterCockpit/cc-backend). Backend specific configuration can de done using the constants defined in the `intro` section in `./rollup.config.js`. +A frontend for [ClusterCockpit](https://github.com/ClusterCockpit/ClusterCockpit) and [cc-backend](https://github.com/ClusterCockpit/cc-backend). Backend specific configuration can be done using the constants defined in the `intro` section in `./rollup.config.mjs`. Builds on: -* [Svelte](https://svelte.dev/) +* [Svelte 5](https://svelte.dev/) * [SvelteStrap](https://sveltestrap.js.org/) * [Bootstrap 5](https://getbootstrap.com/) * [urql](https://github.com/FormidableLabs/urql) diff --git a/web/frontend/src/Job.root.svelte b/web/frontend/src/Job.root.svelte index 50de27b5..99dfa7ac 100644 --- a/web/frontend/src/Job.root.svelte +++ b/web/frontend/src/Job.root.svelte @@ -333,7 +333,18 @@ {:else if thisJob && $jobMetrics?.data?.scopedJobStats} {#snippet gridContent(item)} - {#if item.data} + {#if item?.disabled} + + + Disabled Metric + + +

No dataset(s) returned for {item.metric}

+

Metric has been disabled for subcluster {thisJob.subCluster}.

+

To remove this card, open metric selection, de-select the metric, and press "Close and Apply".

+
+
+ {:else if item?.data} x.scope)} isShared={thisJob.shared != "none"} /> - {:else if item.disabled == true} - - - Disabled Metric - - -

Metric {item.metric} is disabled for cluster {thisJob.cluster}:{thisJob.subCluster}.

-

To remove this card, open metric selection and press "Close and Apply".

-
-
{:else} diff --git a/web/frontend/src/Jobs.root.svelte b/web/frontend/src/Jobs.root.svelte index 52efca6b..0d543fc8 100644 --- a/web/frontend/src/Jobs.root.svelte +++ b/web/frontend/src/Jobs.root.svelte @@ -142,6 +142,8 @@ { diff --git a/web/frontend/src/Node.root.svelte b/web/frontend/src/Node.root.svelte index 6962aff8..d3364b49 100644 --- a/web/frontend/src/Node.root.svelte +++ b/web/frontend/src/Node.root.svelte @@ -119,7 +119,7 @@ const filter = $derived([ { cluster: { eq: cluster } }, - { node: { contains: hostname } }, + { node: { eq: hostname } }, { state: ["running"] }, ]); @@ -253,12 +253,15 @@ forNode /> {:else if item.disabled === true && item.metric} - Metric disabled for subcluster {item.name}:{$nodeMetricsData.data.nodeMetrics[0] - .subCluster} + + + Disabled Metric + + +

No dataset(s) returned for {item.name}

+

Metric has been disabled for subcluster {$nodeMetricsData.data.nodeMetrics[0].subCluster}.

+
+
{:else} diff --git a/web/frontend/src/User.root.svelte b/web/frontend/src/User.root.svelte index 76c9c97a..4ee3f892 100644 --- a/web/frontend/src/User.root.svelte +++ b/web/frontend/src/User.root.svelte @@ -219,9 +219,10 @@ { jobFilters = [...detail.filters, { user: { eq: user.username } }]; selectedCluster = jobFilters[0]?.cluster diff --git a/web/frontend/src/generic/Filters.svelte b/web/frontend/src/generic/Filters.svelte index 74f55ca7..5a8bcf23 100644 --- a/web/frontend/src/generic/Filters.svelte +++ b/web/frontend/src/generic/Filters.svelte @@ -6,6 +6,8 @@ - `filterPresets Object?`: Optional predefined filter values [Default: {}] - `disableClusterSelection Bool?`: Is the selection disabled [Default: false] - `startTimeQuickSelect Bool?`: Render startTime quick selections [Default: false] + - `shortJobQuickSelect Bool?`: Render short job quick selections [Default: false] + - `shortJobCutoff Int?`: Time in seconds for jobs to be considered short [Default: null] - `matchedJobs Number?`: Number of jobs matching the filter [Default: -2] - `showFilter Func`: If the filter component should be rendered in addition to total count info [Default: true] - `applyFilters Func`: The callback function to apply current filter selection @@ -25,6 +27,7 @@ ButtonGroup, ButtonDropdown, Icon, + Tooltip } from "@sveltestrap/sveltestrap"; import Info from "./filters/InfoBox.svelte"; import Cluster from "./filters/Cluster.svelte"; @@ -36,6 +39,7 @@ import Resources from "./filters/Resources.svelte"; import Energy from "./filters/Energy.svelte"; import Statistics from "./filters/Stats.svelte"; + import { formatDurationTime } from "./units.js"; /* Svelte 5 Props */ let { @@ -43,6 +47,8 @@ filterPresets = {}, disableClusterSelection = false, startTimeQuickSelect = false, + shortJobQuickSelect = false, + shortJobCutoff = 0, matchedJobs = -2, showFilter = true, applyFilters @@ -335,6 +341,44 @@ (isStatsOpen = true)}> (isStatsOpen = true)} /> Statistics + {#if shortJobQuickSelect && shortJobCutoff > 0} + + + Short Jobs Selection + + + Job duration less than {formatDurationTime(shortJobCutoff)} + + + { + filters.duration = { + moreThan: null, + lessThan: shortJobCutoff, + from: null, + to: null + } + updateFilters(); + }} + > + + Only Short Jobs + + { + filters.duration = { + moreThan: shortJobCutoff, + lessThan: null, + from: null, + to: null + } + updateFilters(); + }} + > + + Exclude Short Jobs + + {/if} {#if startTimeQuickSelect} Start Time Quick Selection diff --git a/web/frontend/src/generic/JobCompare.svelte b/web/frontend/src/generic/JobCompare.svelte index d5283a9a..dfe548b0 100644 --- a/web/frontend/src/generic/JobCompare.svelte +++ b/web/frontend/src/generic/JobCompare.svelte @@ -112,11 +112,7 @@ // (Re-)query and optionally set new filters; Query will be started reactively. export function queryJobs(filters) { if (filters != null) { - let minRunningFor = ccconfig.jobList_hideShortRunningJobs; - if (minRunningFor && minRunningFor > 0) { - filters.push({ minRunningFor }); - } - filter = filters; + filter = [...filters]; } } diff --git a/web/frontend/src/generic/JobList.svelte b/web/frontend/src/generic/JobList.svelte index 9394ed5f..278f189e 100644 --- a/web/frontend/src/generic/JobList.svelte +++ b/web/frontend/src/generic/JobList.svelte @@ -180,10 +180,6 @@ // (Re-)query and optionally set new filters; Query will be started reactively. export function queryJobs(filters) { if (filters != null) { - let minRunningFor = ccconfig.jobList_hideShortRunningJobs; - if (minRunningFor && minRunningFor > 0) { - filters.push({ minRunningFor }); - } filter = [...filters]; } }; diff --git a/web/frontend/src/generic/filters/StartTime.svelte b/web/frontend/src/generic/filters/StartTime.svelte index 5d9340e3..2eceaf6e 100644 --- a/web/frontend/src/generic/filters/StartTime.svelte +++ b/web/frontend/src/generic/filters/StartTime.svelte @@ -14,10 +14,10 @@ diff --git a/web/frontend/src/generic/joblist/JobListRow.svelte b/web/frontend/src/generic/joblist/JobListRow.svelte index 353e0827..5d129ad0 100644 --- a/web/frontend/src/generic/joblist/JobListRow.svelte +++ b/web/frontend/src/generic/joblist/JobListRow.svelte @@ -99,7 +99,7 @@ }) ); - const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []); + const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope(metrics, $metricsQuery.data.jobMetrics) : []); /* Effects */ $effect(() => { @@ -140,6 +140,26 @@ }); } + function sortAndSelectScope(metricList = [], jobMetrics = []) { + const pendingData = []; + metricList.forEach((metricName) => { + const pendingMetric = { + name: metricName, + disabled: checkMetricDisabled( + globalMetrics, + metricName, + job.cluster, + job.subCluster, + ), + data: null + }; + const scopesData = jobMetrics.filter((jobMetric) => jobMetric.name == metricName) + if (scopesData.length > 0) pendingMetric.data = selectScope(scopesData) + pendingData.push(pendingMetric) + }); + return pendingData; + }; + const selectScope = (jobMetrics) => jobMetrics.reduce( (a, b) => @@ -152,30 +172,6 @@ : a, jobMetrics[0], ); - - const sortAndSelectScope = (jobMetrics) => - metrics - .map((name) => jobMetrics.filter((jobMetric) => jobMetric.name == name)) - .map((jobMetrics) => ({ - disabled: false, - data: jobMetrics.length > 0 ? selectScope(jobMetrics) : null, - })) - .map((jobMetric) => { - if (jobMetric.data) { - return { - name: jobMetric.data.name, - disabled: checkMetricDisabled( - globalMetrics, - jobMetric.data.name, - job.cluster, - job.subCluster, - ), - data: jobMetric.data, - }; - } else { - return jobMetric; - } - }); @@ -211,39 +207,36 @@ {/if} {#each refinedData as metric, i (metric?.name || i)} - {#key metric} - {#if metric?.data} - {#if metric?.disabled} - - Metric {metric.data.name}: Disabled for subcluster {job.subCluster} - - {:else} - handleZoom(detail, metric.data.name)} - height={plotHeight} - timestep={metric.data.metric.timestep} - scope={metric.data.scope} - series={metric.data.metric.series} - statisticsSeries={metric.data.metric.statisticsSeries} - metric={metric.data.name} - cluster={clusterInfos.find((c) => c.name == job.cluster)} - subCluster={job.subCluster} - isShared={job.shared != "none"} - numhwthreads={job.numHWThreads} - numaccs={job.numAcc} - zoomState={zoomStates[metric.data.name] || null} - thresholdState={thresholdStates[metric.data.name] || null} - /> - {/if} - {:else} - -

No dataset(s) returned for {metrics[i]}

-

Metric or host was not found in metric store for cluster {job.cluster}:

-

Identical messages in {metrics[i]} column: Metric not found.

-

Identical messages in job {job.jobId} row: Host not found.

-
- {/if} - {/key} + {#if metric?.disabled} + +

No dataset(s) returned for {metrics[i]}

+

Metric has been disabled for subcluster {job.subCluster}.

+
+ {:else if metric?.data} + handleZoom(detail, metric.data.name)} + height={plotHeight} + timestep={metric.data.metric.timestep} + scope={metric.data.scope} + series={metric.data.metric.series} + statisticsSeries={metric.data.metric.statisticsSeries} + metric={metric.data.name} + cluster={clusterInfos.find((c) => c.name == job.cluster)} + subCluster={job.subCluster} + isShared={job.shared != "none"} + numhwthreads={job.numHWThreads} + numaccs={job.numAcc} + zoomState={zoomStates[metric.data.name] || null} + thresholdState={thresholdStates[metric.data.name] || null} + /> + {:else} + +

No dataset(s) returned for {metrics[i]}

+

Metric or host was not found in metric store for cluster {job.cluster}:

+

Identical messages in {metrics[i]} column: Metric not found.

+

Identical messages in job {job.jobId} row: Host not found.

+
+ {/if} {:else} diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index dcefa56d..8234b32c 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -88,16 +88,19 @@ function printAvailability(metric, cluster) { const avail = globalMetrics.find((gm) => gm.name === metric)?.availability - if (!cluster) { - return avail.map((av) => av.cluster).join(', ') - } else { - const subAvail = avail.find((av) => av.cluster === cluster)?.subClusters - if (subAvail) { - return subAvail.join(', ') + if (avail) { + if (!cluster) { + return avail.map((av) => av.cluster).join(', ') } else { - return `Not available for ${cluster}` + const subAvail = avail.find((av) => av.cluster === cluster)?.subClusters + if (subAvail) { + return subAvail.join(', ') + } else { + return `Not available for ${cluster}` + } } } + return "" } function columnsDragOver(event) { diff --git a/web/frontend/src/systems/NodeOverview.svelte b/web/frontend/src/systems/NodeOverview.svelte index fd463600..a34fdf0f 100644 --- a/web/frontend/src/systems/NodeOverview.svelte +++ b/web/frontend/src/systems/NodeOverview.svelte @@ -110,7 +110,7 @@ }; }); }; - + let pendingMapped = []; if (rawData.length > 0) { pendingMapped = rawData.map((h) => ({ @@ -120,12 +120,11 @@ data: h.metrics.filter( (m) => m?.name == selectedMetric && m.scope == "node", ), - // TODO: Move To New Func Variant With Disabled Check on WHole Cluster Level: This never Triggers! disabled: checkMetricDisabled(globalMetrics, selectedMetric, cluster, h.subCluster), })) .sort((a, b) => a.host.localeCompare(b.host)) } - + return pendingMapped; } @@ -162,35 +161,32 @@ - {#if item?.data} - {#if item.disabled === true} - - Metric disabled for subcluster {selectedMetric}:{item.subCluster} - {:else if item.disabled === false} - - - {#key item.data[0].metric.series[0].data.length} - - {/key} - {:else} - - Global Metric List Not Initialized - Can not determine {selectedMetric} availability: Please Reload Page - - {/if} + {#if item?.disabled} + + + Disabled Metric + + +

No dataset(s) returned for {selectedMetric}

+

Metric has been disabled for subcluster {item.subCluster}.

+
+
+ {:else if item?.data} + + + {#key item.data[0].metric.series[0].data.length} + + {/key} {:else} + Missing Metric @@ -205,10 +201,22 @@ {/each} {/key} +{:else if hostnameFilter || hoststateFilter != 'all'} + + + + Empty Filter Return + + +

No datasets returned for {selectedMetric}.

+

Hostname filter and/or host state filter returned no matches.

+
+
+
{:else} - - - + + + Missing Metric diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index 2abe0b41..e091769b 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -72,10 +72,30 @@ ); const extendedLegendData = $derived($nodeJobsData?.data ? buildExtendedLegend() : null); - const refinedData = $derived(nodeData?.metrics ? sortAndSelectScope(nodeData.metrics) : []); + const refinedData = $derived(nodeData?.metrics ? sortAndSelectScope(selectedMetrics, nodeData.metrics) : []); const dataHealth = $derived(refinedData.filter((rd) => rd.disabled === false).map((enabled) => (enabled?.data?.metric?.series?.length > 0))); /* Functions */ + function sortAndSelectScope(metricList = [], nodeMetrics = []) { + const pendingData = []; + metricList.forEach((metricName) => { + const pendingMetric = { + name: metricName, + disabled: checkMetricDisabled( + globalMetrics, + metricName, + cluster, + nodeData.subCluster, + ), + data: null + }; + const scopesData = nodeMetrics.filter((nodeMetric) => nodeMetric.name == metricName) + if (scopesData.length > 0) pendingMetric.data = selectScope(scopesData) + pendingData.push(pendingMetric) + }); + return pendingData; + }; + const selectScope = (nodeMetrics) => nodeMetrics.reduce( (a, b) => @@ -83,29 +103,6 @@ nodeMetrics[0], ); - const sortAndSelectScope = (allNodeMetrics) => - selectedMetrics - .map((selectedName) => allNodeMetrics.filter((nodeMetric) => nodeMetric.name == selectedName)) - .map((matchedNodeMetrics) => ({ - disabled: false, - data: matchedNodeMetrics.length > 0 ? selectScope(matchedNodeMetrics) : null, - })) - .map((scopedNodeMetric) => { - if (scopedNodeMetric?.data) { - return { - disabled: checkMetricDisabled( - globalMetrics, - scopedNodeMetric.data.name, - cluster, - nodeData.subCluster, - ), - data: scopedNodeMetric.data, - }; - } else { - return scopedNodeMetric; - } - }); - function buildExtendedLegend() { let pendingExtendedLegendData = null // Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes] @@ -171,68 +168,59 @@ {/if} {#each refinedData as metricData, i (metricData?.data?.name || i)} - {#key metricData} - - {#if metricData?.disabled} - Metric {selectedMetrics[i]} disabled for subcluster {nodeData.subCluster} - {:else if !metricData?.data} - -

No dataset(s) returned for {selectedMetrics[i]}

-

Metric was not found in metric store for cluster {cluster}.

-
- {:else if !metricData?.data?.name} - Metric without name for subcluster {`Metric Index ${i}`}:{nodeData.subCluster} - {:else if !!metricData.data?.metric.statisticsSeries} - - -
- {#key extendedLegendData} - - {/key} - {:else} - - {/if} - - {/key} + + {#if metricData?.disabled} + +

No dataset(s) returned for {selectedMetrics[i]}

+

Metric has been disabled for subcluster {nodeData.subCluster}.

+
+ {:else if !metricData?.data} + +

No dataset(s) returned for {selectedMetrics[i]}

+

Metric was not found in metric store for cluster {cluster}.

+
+ {:else if !!metricData.data?.metric.statisticsSeries} + + +
+ {#key extendedLegendData} + + {/key} + {:else} + + {/if} + {/each}