Merge pull request #486 from ClusterCockpit/dev

Dev
This commit is contained in:
Jan Eitzinger
2026-02-10 09:25:54 +01:00
committed by GitHub
33 changed files with 601 additions and 402 deletions

View File

@@ -22,7 +22,7 @@ make
make frontend
# Build only the backend (requires frontend to be built first)
go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.4.4 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend
go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.5.0 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend
```
### Testing
@@ -41,7 +41,7 @@ go test ./internal/repository
### Code Generation
```bash
# Regenerate GraphQL schema and resolvers (after modifying api/*.graphqls)
# Regenerate GraphQL schema and resolvers (after modifying api/schema.graphqls)
make graphql
# Regenerate Swagger/OpenAPI docs (after modifying API comments)
@@ -90,7 +90,7 @@ The backend follows a layered architecture with clear separation of concerns:
- Transaction support for batch operations
- **internal/api**: REST API endpoints (Swagger/OpenAPI documented)
- **internal/graph**: GraphQL API (uses gqlgen)
- Schema in `api/*.graphqls`
- Schema in `api/schema.graphqls`
- Generated code in `internal/graph/generated/`
- Resolvers in `internal/graph/schema.resolvers.go`
- **internal/auth**: Authentication layer
@@ -108,7 +108,7 @@ The backend follows a layered architecture with clear separation of concerns:
- File system backend (default)
- S3 backend
- SQLite backend (experimental)
- **pkg/nats**: NATS client and message decoding utilities
- **internal/metricstoreclient**: Client for cc-metric-store queries
### Frontend Structure
@@ -138,7 +138,7 @@ recommended). Configuration is per-cluster in `config.json`.
3. The first authenticator that returns true performs the actual `Login`
4. JWT tokens are used for API authentication
**Database Migrations**: SQL migrations in `internal/repository/migrations/` are
**Database Migrations**: SQL migrations in `internal/repository/migrations/sqlite3/` are
applied automatically on startup. Version tracking in `version` table.
**Scopes**: Metrics can be collected at different scopes:
@@ -173,7 +173,7 @@ applied automatically on startup. Version tracking in `version` table.
**GraphQL** (gqlgen):
- Schema: `api/*.graphqls`
- Schema: `api/schema.graphqls`
- Config: `gqlgen.yml`
- Generated code: `internal/graph/generated/`
- Custom resolvers: `internal/graph/schema.resolvers.go`
@@ -182,7 +182,7 @@ applied automatically on startup. Version tracking in `version` table.
**Swagger/OpenAPI**:
- Annotations in `internal/api/*.go`
- Generated docs: `api/docs.go`, `api/swagger.yaml`
- Generated docs: `internal/api/docs.go`, `api/swagger.yaml`
- Run `make swagger` after API changes
## Testing Conventions
@@ -196,7 +196,7 @@ applied automatically on startup. Version tracking in `version` table.
### Adding a new GraphQL field
1. Edit schema in `api/*.graphqls`
1. Edit schema in `api/schema.graphqls`
2. Run `make graphql`
3. Implement resolver in `internal/graph/schema.resolvers.go`
@@ -215,7 +215,7 @@ applied automatically on startup. Version tracking in `version` table.
### Modifying database schema
1. Create new migration in `internal/repository/migrations/`
1. Create new migration in `internal/repository/migrations/sqlite3/`
2. Increment `repository.Version`
3. Test with fresh database and existing database

View File

@@ -173,14 +173,14 @@ ln -s <your-existing-job-archive> ./var/job-archive
Job classification and application detection
- [`taskmanager`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/taskmanager)
Background task management and scheduled jobs
- [`metricstoreclient`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricstoreclient)
Client for cc-metric-store queries
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
contains Go packages that can be used by other projects.
- [`archive`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/archive)
Job archive backend implementations (filesystem, S3)
Job archive backend implementations (filesystem, S3, SQLite)
- [`metricstore`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/metricstore)
In-memory metric data store with checkpointing and metric loading
- [`nats`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/nats)
NATS client and message handling
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
Additional command line helper tools.
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)

View File

@@ -299,6 +299,10 @@ func (s *Server) init() error {
})
}
// Set NotFound on the router so chi uses it for all unmatched routes,
// including those under subrouters like /api, /userapi, /frontend, etc.
s.router.NotFound(notFoundHandler)
if config.Keys.EmbedStaticFiles {
if i, err := os.Stat("./var/img"); err == nil {
if i.IsDir() {

2
go.mod
View File

@@ -9,7 +9,7 @@ tool (
require (
github.com/99designs/gqlgen v0.17.85
github.com/ClusterCockpit/cc-lib/v2 v2.2.1
github.com/ClusterCockpit/cc-lib/v2 v2.2.2
github.com/Masterminds/squirrel v1.5.4
github.com/aws/aws-sdk-go-v2 v1.41.1
github.com/aws/aws-sdk-go-v2/config v1.32.6

2
go.sum
View File

@@ -6,6 +6,8 @@ github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/ClusterCockpit/cc-lib/v2 v2.2.1 h1:iCVas+Jc61zFH5S2VG3H1sc7tsn+U4lOJwUYjYZEims=
github.com/ClusterCockpit/cc-lib/v2 v2.2.1/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
github.com/ClusterCockpit/cc-lib/v2 v2.2.2 h1:ye4RY57I19c2cXr3XWZBS/QYYgQVeGFvsiu5HkyKq9E=
github.com/ClusterCockpit/cc-lib/v2 v2.2.2/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=

View File

@@ -754,6 +754,7 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
return
}
isCached := false
job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
if err != nil {
// Try cached jobs if not found in main repository
@@ -764,9 +765,10 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
return
}
job = cachedJob
isCached = true
}
api.checkAndHandleStopJob(rw, job, req)
api.checkAndHandleStopJob(rw, job, req, isCached)
}
// deleteJobByID godoc
@@ -923,7 +925,7 @@ func (api *RestAPI) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
}
}
func (api *RestAPI) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobAPIRequest) {
func (api *RestAPI) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobAPIRequest, isCached bool) {
// Sanity checks
if job.State != schema.JobStateRunning {
handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, *job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw)
@@ -948,11 +950,21 @@ func (api *RestAPI) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
api.JobRepository.Mutex.Lock()
defer api.JobRepository.Mutex.Unlock()
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, *job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
// If the job is still in job_cache, transfer it to the job table first
// so that job.ID always points to the job table for downstream code
if isCached {
newID, err := api.JobRepository.TransferCachedJobToMain(*job.ID)
if err != nil {
handleError(fmt.Errorf("jobId %d (id %d) on %s : transferring cached job failed: %w", job.JobID, *job.ID, job.Cluster, err), http.StatusInternalServerError, rw)
return
}
cclog.Infof("transferred cached job to main table: old id %d -> new id %d (jobId=%d)", *job.ID, newID, job.JobID)
job.ID = &newID
}
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, *job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
return
}
cclog.Infof("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", *job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)

View File

@@ -251,6 +251,7 @@ func (api *NatsAPI) handleStopJob(payload string) {
return
}
isCached := false
job, err := api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
if err != nil {
cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
@@ -260,6 +261,7 @@ func (api *NatsAPI) handleStopJob(payload string) {
return
}
job = cachedJob
isCached = true
}
if job.State != schema.JobStateRunning {
@@ -287,16 +289,26 @@ func (api *NatsAPI) handleStopJob(payload string) {
api.JobRepository.Mutex.Lock()
defer api.JobRepository.Mutex.Unlock()
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: marking job as '%s' failed: %v",
job.JobID, job.ID, job.Cluster, job.State, err)
// If the job is still in job_cache, transfer it to the job table first
if isCached {
newID, err := api.JobRepository.TransferCachedJobToMain(*job.ID)
if err != nil {
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: transferring cached job failed: %v",
job.JobID, *job.ID, job.Cluster, err)
return
}
cclog.Infof("NATS: transferred cached job to main table: old id %d -> new id %d (jobId=%d)", *job.ID, newID, job.JobID)
job.ID = &newID
}
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: marking job as '%s' failed: %v",
job.JobID, *job.ID, job.Cluster, job.State, err)
return
}
cclog.Infof("NATS: archiving job (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s",
job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
*job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
return

View File

@@ -170,7 +170,6 @@ All exported functions are safe for concurrent use:
- `Start()` - Safe to call once
- `TriggerArchiving()` - Safe from multiple goroutines
- `Shutdown()` - Safe to call once
- `WaitForArchiving()` - Deprecated, but safe
Internal state is protected by:
- Channel synchronization (`archiveChannel`)

View File

@@ -294,6 +294,11 @@ func handleOIDCUser(OIDCUser *schema.User) {
handleUserSync(OIDCUser, Keys.OpenIDConfig.SyncUserOnLogin, Keys.OpenIDConfig.UpdateUserOnLogin)
}
// handleLdapUser syncs LDAP user with database
func handleLdapUser(ldapUser *schema.User) {
handleUserSync(ldapUser, Keys.LdapConfig.SyncUserOnLogin, Keys.LdapConfig.UpdateUserOnLogin)
}
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
session, err := auth.sessionStore.New(r, "session")
if err != nil {

View File

@@ -6,11 +6,12 @@
package auth
import (
"errors"
"fmt"
"net"
"net/http"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/repository"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
@@ -25,16 +26,19 @@ type LdapConfig struct {
UserBind string `json:"user-bind"`
UserFilter string `json:"user-filter"`
UserAttr string `json:"username-attr"`
UIDAttr string `json:"uid-attr"`
SyncInterval string `json:"sync-interval"` // Parsed using time.ParseDuration.
SyncDelOldUsers bool `json:"sync-del-old-users"`
// Should an non-existent user be added to the DB if user exists in ldap directory
// Should a non-existent user be added to the DB if user exists in ldap directory
SyncUserOnLogin bool `json:"sync-user-on-login"`
UpdateUserOnLogin bool `json:"update-user-on-login"`
}
type LdapAuthenticator struct {
syncPassword string
UserAttr string
UIDAttr string
}
var _ Authenticator = (*LdapAuthenticator)(nil)
@@ -51,6 +55,12 @@ func (la *LdapAuthenticator) Init() error {
la.UserAttr = "gecos"
}
if Keys.LdapConfig.UIDAttr != "" {
la.UIDAttr = Keys.LdapConfig.UIDAttr
} else {
la.UIDAttr = "uid"
}
return nil
}
@@ -66,8 +76,7 @@ func (la *LdapAuthenticator) CanLogin(
if user.AuthSource == schema.AuthViaLDAP {
return user, true
}
} else {
if lc.SyncUserOnLogin {
} else if lc.SyncUserOnLogin {
l, err := la.getLdapConnection(true)
if err != nil {
cclog.Error("LDAP connection error")
@@ -79,8 +88,8 @@ func (la *LdapAuthenticator) CanLogin(
searchRequest := ldap.NewSearchRequest(
lc.UserBase,
ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
fmt.Sprintf("(&%s(uid=%s))", lc.UserFilter, username),
[]string{"dn", "uid", la.UserAttr}, nil)
fmt.Sprintf("(&%s(%s=%s))", lc.UserFilter, la.UIDAttr, ldap.EscapeFilter(username)),
[]string{"dn", la.UIDAttr, la.UserAttr}, nil)
sr, err := l.Search(searchRequest)
if err != nil {
@@ -94,28 +103,18 @@ func (la *LdapAuthenticator) CanLogin(
}
entry := sr.Entries[0]
name := entry.GetAttributeValue(la.UserAttr)
var roles []string
roles = append(roles, schema.GetRoleString(schema.RoleUser))
projects := make([]string, 0)
user = &schema.User{
Username: username,
Name: name,
Roles: roles,
Projects: projects,
Name: entry.GetAttributeValue(la.UserAttr),
Roles: []string{schema.GetRoleString(schema.RoleUser)},
Projects: make([]string, 0),
AuthType: schema.AuthSession,
AuthSource: schema.AuthViaLDAP,
}
if err := repository.GetUserRepository().AddUser(user); err != nil {
cclog.Errorf("User '%s' LDAP: Insert into DB failed", username)
return nil, false
}
handleLdapUser(user)
return user, true
}
}
return nil, false
}
@@ -132,7 +131,7 @@ func (la *LdapAuthenticator) Login(
}
defer l.Close()
userDn := strings.ReplaceAll(Keys.LdapConfig.UserBind, "{username}", user.Username)
userDn := strings.ReplaceAll(Keys.LdapConfig.UserBind, "{username}", ldap.EscapeDN(user.Username))
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
cclog.Errorf("AUTH/LDAP > Authentication for user %s failed: %v",
user.Username, err)
@@ -170,7 +169,7 @@ func (la *LdapAuthenticator) Sync() error {
lc.UserBase,
ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
lc.UserFilter,
[]string{"dn", "uid", la.UserAttr}, nil))
[]string{"dn", la.UIDAttr, la.UserAttr}, nil))
if err != nil {
cclog.Warn("LDAP search error")
return err
@@ -178,9 +177,9 @@ func (la *LdapAuthenticator) Sync() error {
newnames := map[string]string{}
for _, entry := range ldapResults.Entries {
username := entry.GetAttributeValue("uid")
username := entry.GetAttributeValue(la.UIDAttr)
if username == "" {
return errors.New("no attribute 'uid'")
return fmt.Errorf("no attribute '%s'", la.UIDAttr)
}
_, ok := users[username]
@@ -194,20 +193,19 @@ func (la *LdapAuthenticator) Sync() error {
for username, where := range users {
if where == InDB && lc.SyncDelOldUsers {
ur.DelUser(username)
if err := ur.DelUser(username); err != nil {
cclog.Errorf("User '%s' LDAP: Delete from DB failed: %v", username, err)
return err
}
cclog.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
} else if where == InLdap {
name := newnames[username]
var roles []string
roles = append(roles, schema.GetRoleString(schema.RoleUser))
projects := make([]string, 0)
user := &schema.User{
Username: username,
Name: name,
Roles: roles,
Projects: projects,
Roles: []string{schema.GetRoleString(schema.RoleUser)},
Projects: make([]string, 0),
AuthSource: schema.AuthViaLDAP,
}
@@ -224,11 +222,13 @@ func (la *LdapAuthenticator) Sync() error {
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
lc := Keys.LdapConfig
conn, err := ldap.DialURL(lc.URL)
conn, err := ldap.DialURL(lc.URL,
ldap.DialWithDialer(&net.Dialer{Timeout: 10 * time.Second}))
if err != nil {
cclog.Warn("LDAP URL dial failed")
return nil, err
}
conn.SetTimeout(30 * time.Second)
if admin {
if err := conn.Bind(lc.SearchDN, la.syncPassword); err != nil {

View File

@@ -9,6 +9,7 @@ import (
"context"
"crypto/rand"
"encoding/base64"
"fmt"
"io"
"net/http"
"os"
@@ -50,6 +51,7 @@ func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value strin
MaxAge: int(time.Hour.Seconds()),
Secure: r.TLS != nil,
HttpOnly: true,
SameSite: http.SameSiteLaxMode,
}
http.SetCookie(w, c)
}
@@ -77,8 +79,7 @@ func NewOIDC(a *Authentication) *OIDC {
ClientID: clientID,
ClientSecret: clientSecret,
Endpoint: provider.Endpoint(),
RedirectURL: "oidc-callback",
Scopes: []string{oidc.ScopeOpenID, "profile", "email"},
Scopes: []string{oidc.ScopeOpenID, "profile"},
}
oa := &OIDC{provider: provider, client: client, clientID: clientID, authentication: a}
@@ -122,54 +123,93 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier))
if err != nil {
http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
cclog.Errorf("token exchange failed: %s", err.Error())
http.Error(rw, "Authentication failed during token exchange", http.StatusInternalServerError)
return
}
// Get user info from OIDC provider with same timeout
userInfo, err := oa.provider.UserInfo(ctx, oauth2.StaticTokenSource(token))
if err != nil {
http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
cclog.Errorf("failed to get userinfo: %s", err.Error())
http.Error(rw, "Failed to retrieve user information", http.StatusInternalServerError)
return
}
// // Extract the ID Token from OAuth2 token.
// rawIDToken, ok := token.Extra("id_token").(string)
// if !ok {
// http.Error(rw, "Cannot access idToken", http.StatusInternalServerError)
// }
//
// verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID})
// // Parse and verify ID Token payload.
// idToken, err := verifier.Verify(context.Background(), rawIDToken)
// if err != nil {
// http.Error(rw, "Failed to extract idToken: "+err.Error(), http.StatusInternalServerError)
// }
// Verify ID token and nonce to prevent replay attacks
rawIDToken, ok := token.Extra("id_token").(string)
if !ok {
http.Error(rw, "ID token not found in response", http.StatusInternalServerError)
return
}
nonceCookie, err := r.Cookie("nonce")
if err != nil {
http.Error(rw, "nonce cookie not found", http.StatusBadRequest)
return
}
verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID})
idToken, err := verifier.Verify(ctx, rawIDToken)
if err != nil {
cclog.Errorf("ID token verification failed: %s", err.Error())
http.Error(rw, "ID token verification failed", http.StatusInternalServerError)
return
}
if idToken.Nonce != nonceCookie.Value {
http.Error(rw, "Nonce mismatch", http.StatusBadRequest)
return
}
projects := make([]string, 0)
// Extract custom claims
// Extract custom claims from userinfo
var claims struct {
Username string `json:"preferred_username"`
Name string `json:"name"`
Profile struct {
// Keycloak realm-level roles
RealmAccess struct {
Roles []string `json:"roles"`
} `json:"realm_access"`
// Keycloak client-level roles
ResourceAccess struct {
Client struct {
Roles []string `json:"roles"`
} `json:"clustercockpit"`
} `json:"resource_access"`
}
if err := userInfo.Claims(&claims); err != nil {
http.Error(rw, "Failed to extract Claims: "+err.Error(), http.StatusInternalServerError)
cclog.Errorf("failed to extract claims: %s", err.Error())
http.Error(rw, "Failed to extract user claims", http.StatusInternalServerError)
return
}
if claims.Username == "" {
http.Error(rw, "Username claim missing from OIDC provider", http.StatusBadRequest)
return
}
// Merge roles from both client-level and realm-level access
oidcRoles := append(claims.ResourceAccess.Client.Roles, claims.RealmAccess.Roles...)
roleSet := make(map[string]bool)
for _, r := range oidcRoles {
switch r {
case "user":
roleSet[schema.GetRoleString(schema.RoleUser)] = true
case "admin":
roleSet[schema.GetRoleString(schema.RoleAdmin)] = true
case "manager":
roleSet[schema.GetRoleString(schema.RoleManager)] = true
case "support":
roleSet[schema.GetRoleString(schema.RoleSupport)] = true
}
}
var roles []string
for _, r := range claims.Profile.Client.Roles {
switch r {
case "user":
roles = append(roles, schema.GetRoleString(schema.RoleUser))
case "admin":
roles = append(roles, schema.GetRoleString(schema.RoleAdmin))
}
for role := range roleSet {
roles = append(roles, role)
}
if len(roles) == 0 {
@@ -188,8 +228,12 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
handleOIDCUser(user)
}
oa.authentication.SaveSession(rw, r, user)
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
if err := oa.authentication.SaveSession(rw, r, user); err != nil {
cclog.Errorf("session save failed for user %q: %s", user.Username, err.Error())
http.Error(rw, "Failed to create session", http.StatusInternalServerError)
return
}
cclog.Infof("login successful: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
userCtx := context.WithValue(r.Context(), repository.ContextUserKey, user)
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(userCtx))
}
@@ -206,7 +250,24 @@ func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
codeVerifier := oauth2.GenerateVerifier()
setCallbackCookie(rw, r, "verifier", codeVerifier)
// Generate nonce for ID token replay protection
nonce, err := randString(16)
if err != nil {
http.Error(rw, "Internal error", http.StatusInternalServerError)
return
}
setCallbackCookie(rw, r, "nonce", nonce)
// Build redirect URL from the incoming request
scheme := "https"
if r.TLS == nil && r.Header.Get("X-Forwarded-Proto") != "https" {
scheme = "http"
}
oa.client.RedirectURL = fmt.Sprintf("%s://%s/oidc-callback", scheme, r.Host)
// Redirect user to consent page to ask for permission
url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.S256ChallengeOption(codeVerifier))
url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline,
oauth2.S256ChallengeOption(codeVerifier),
oidc.Nonce(nonce))
http.Redirect(rw, r, url, http.StatusFound)
}

View File

@@ -92,9 +92,17 @@ var configSchema = `
"description": "Delete obsolete users in database.",
"type": "boolean"
},
"uid-attr": {
"description": "LDAP attribute used as login username. Default: uid",
"type": "string"
},
"sync-user-on-login": {
"description": "Add non-existent user to DB at login attempt if user exists in Ldap directory",
"type": "boolean"
},
"update-user-on-login": {
"description": "Should an existent user attributes in the DB be updated at login attempt with values from LDAP.",
"type": "boolean"
}
},
"required": ["url", "user-base", "search-dn", "user-bind", "user-filter"]

View File

@@ -71,8 +71,9 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
jobs = append(jobs, job)
}
// Use INSERT OR IGNORE to skip jobs already transferred by the stop path
_, err = r.DB.Exec(
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
"INSERT OR IGNORE INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
if err != nil {
cclog.Warnf("Error while Job sync: %v", err)
return nil, err
@@ -87,6 +88,29 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
return jobs, nil
}
// TransferCachedJobToMain moves a job from job_cache to the job table.
// Caller must hold r.Mutex. Returns the new job table ID.
func (r *JobRepository) TransferCachedJobToMain(cacheID int64) (int64, error) {
res, err := r.DB.Exec(
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache WHERE id = ?",
cacheID)
if err != nil {
return 0, fmt.Errorf("transferring cached job %d to main table failed: %w", cacheID, err)
}
newID, err := res.LastInsertId()
if err != nil {
return 0, fmt.Errorf("getting new job ID after transfer failed: %w", err)
}
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", cacheID)
if err != nil {
return 0, fmt.Errorf("deleting cached job %d after transfer failed: %w", cacheID, err)
}
return newID, nil
}
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
@@ -129,20 +153,3 @@ func (r *JobRepository) Stop(
return err
}
func (r *JobRepository) StopCached(
jobID int64,
duration int32,
state schema.JobState,
monitoringStatus int32,
) (err error) {
// Note: StopCached updates job_cache table, not the main job table
// Cache invalidation happens when job is synced to main table
stmt := sq.Update("job_cache").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job_cache.id = ?", jobID)
_, err = stmt.RunWith(r.stmtCache).Exec()
return err
}

View File

@@ -331,58 +331,60 @@ func TestStop(t *testing.T) {
})
}
func TestStopCached(t *testing.T) {
func TestTransferCachedJobToMain(t *testing.T) {
r := setup(t)
t.Run("successful stop cached job", func(t *testing.T) {
t.Run("successful transfer from cache to main", func(t *testing.T) {
// Insert a job in job_cache
job := createTestJob(999009, "testcluster")
id, err := r.Start(job)
cacheID, err := r.Start(job)
require.NoError(t, err)
// Stop the cached job
duration := int32(3600)
state := schema.JobStateCompleted
monitoringStatus := int32(schema.MonitoringStatusArchivingSuccessful)
// Transfer the cached job to the main table
r.Mutex.Lock()
newID, err := r.TransferCachedJobToMain(cacheID)
r.Mutex.Unlock()
require.NoError(t, err, "TransferCachedJobToMain should succeed")
assert.NotEqual(t, cacheID, newID, "New ID should differ from cache ID")
err = r.StopCached(id, duration, state, monitoringStatus)
require.NoError(t, err, "StopCached should succeed")
// Verify job was updated in job_cache table
var retrievedDuration int32
var retrievedState string
var retrievedMonStatus int32
err = r.DB.QueryRow(`SELECT duration, job_state, monitoring_status FROM job_cache WHERE id = ?`, id).Scan(
&retrievedDuration, &retrievedState, &retrievedMonStatus)
// Verify job exists in job table
var count int
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE id = ?`, newID).Scan(&count)
require.NoError(t, err)
assert.Equal(t, duration, retrievedDuration)
assert.Equal(t, string(state), retrievedState)
assert.Equal(t, monitoringStatus, retrievedMonStatus)
assert.Equal(t, 1, count, "Job should exist in main table")
// Verify job was removed from job_cache
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job_cache WHERE id = ?`, cacheID).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "Job should be removed from cache")
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
require.NoError(t, err)
})
t.Run("stop cached job does not affect job table", func(t *testing.T) {
t.Run("transfer preserves job data", func(t *testing.T) {
// Insert a job in job_cache
job := createTestJob(999010, "testcluster")
id, err := r.Start(job)
cacheID, err := r.Start(job)
require.NoError(t, err)
// Stop the cached job
err = r.StopCached(id, 3600, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful))
// Transfer the cached job
r.Mutex.Lock()
newID, err := r.TransferCachedJobToMain(cacheID)
r.Mutex.Unlock()
require.NoError(t, err)
// Verify job table was not affected
var count int
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE job_id = ? AND cluster = ?`,
job.JobID, job.Cluster).Scan(&count)
// Verify the transferred job has the correct data
var jobID int64
var cluster string
err = r.DB.QueryRow(`SELECT job_id, cluster FROM job WHERE id = ?`, newID).Scan(&jobID, &cluster)
require.NoError(t, err)
assert.Equal(t, 0, count, "Job table should not be affected by StopCached")
assert.Equal(t, job.JobID, jobID)
assert.Equal(t, job.Cluster, cluster)
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
require.NoError(t, err)
})
}

View File

@@ -139,6 +139,13 @@ func nodeTestSetup(t *testing.T) {
}
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
if err := ResetConnection(); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
ResetConnection()
})
Connect(dbfilepath)
if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {

View File

@@ -6,6 +6,8 @@ package repository
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
@@ -148,8 +150,22 @@ func getContext(tb testing.TB) context.Context {
func setup(tb testing.TB) *JobRepository {
tb.Helper()
cclog.Init("warn", true)
dbfile := "testdata/job.db"
err := MigrateDB(dbfile)
// Copy test DB to a temp file for test isolation
srcData, err := os.ReadFile("testdata/job.db")
noErr(tb, err)
dbfile := filepath.Join(tb.TempDir(), "job.db")
err = os.WriteFile(dbfile, srcData, 0o644)
noErr(tb, err)
// Reset singletons so Connect uses the new temp DB
err = ResetConnection()
noErr(tb, err)
tb.Cleanup(func() {
ResetConnection()
})
err = MigrateDB(dbfile)
noErr(tb, err)
Connect(dbfile)
return GetJobRepository()

View File

@@ -25,17 +25,11 @@ func TestBuildJobStatsQuery(t *testing.T) {
func TestJobStats(t *testing.T) {
r := setup(t)
// First, count the actual jobs in the database (excluding test jobs)
var expectedCount int
err := r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE cluster != 'testcluster'`).Scan(&expectedCount)
err := r.DB.QueryRow(`SELECT COUNT(*) FROM job`).Scan(&expectedCount)
noErr(t, err)
filter := &model.JobFilter{}
// Exclude test jobs created by other tests
testCluster := "testcluster"
filter.Cluster = &model.StringInput{Neq: &testCluster}
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{filter})
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{})
noErr(t, err)
if stats[0].TotalJobs != expectedCount {

View File

@@ -31,8 +31,25 @@ func setupUserTest(t *testing.T) *UserCfgRepo {
}`
cclog.Init("info", true)
dbfilepath := "testdata/job.db"
err := MigrateDB(dbfilepath)
// Copy test DB to a temp file for test isolation
srcData, err := os.ReadFile("testdata/job.db")
if err != nil {
t.Fatal(err)
}
dbfilepath := filepath.Join(t.TempDir(), "job.db")
if err := os.WriteFile(dbfilepath, srcData, 0o644); err != nil {
t.Fatal(err)
}
if err := ResetConnection(); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
ResetConnection()
})
err = MigrateDB(dbfilepath)
if err != nil {
t.Fatal(err)
}

View File

@@ -25,6 +25,7 @@ func initClusterConfig() error {
GlobalUserMetricList = []*schema.GlobalMetricListItem{}
NodeLists = map[string]map[string]NodeList{}
metricLookup := make(map[string]schema.GlobalMetricListItem)
userMetricLookup := make(map[string]schema.GlobalMetricListItem)
for _, c := range ar.GetClusters() {
@@ -62,11 +63,12 @@ func initClusterConfig() error {
if _, ok := metricLookup[mc.Name]; !ok {
metricLookup[mc.Name] = schema.GlobalMetricListItem{
Name: mc.Name, Scope: mc.Scope, Restrict: mc.Restrict, Unit: mc.Unit, Footprint: mc.Footprint,
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint,
}
}
availability := schema.ClusterSupport{Cluster: cluster.Name}
userAvailability := schema.ClusterSupport{Cluster: cluster.Name}
scLookup := make(map[string]*schema.SubClusterConfig)
for _, scc := range mc.SubClusters {
@@ -94,6 +96,7 @@ func initClusterConfig() error {
newMetric.Footprint = mc.Footprint
}
isRestricted := mc.Restrict
if cfg, ok := scLookup[sc.Name]; ok {
if cfg.Remove {
continue
@@ -105,9 +108,13 @@ func initClusterConfig() error {
newMetric.Footprint = cfg.Footprint
newMetric.Energy = cfg.Energy
newMetric.LowerIsBetter = cfg.LowerIsBetter
isRestricted = cfg.Restrict
}
availability.SubClusters = append(availability.SubClusters, sc.Name)
if !isRestricted {
userAvailability.SubClusters = append(userAvailability.SubClusters, sc.Name)
}
sc.MetricConfig = append(sc.MetricConfig, newMetric)
if newMetric.Footprint != "" {
@@ -124,6 +131,17 @@ func initClusterConfig() error {
item := metricLookup[mc.Name]
item.Availability = append(item.Availability, availability)
metricLookup[mc.Name] = item
if len(userAvailability.SubClusters) > 0 {
userItem, ok := userMetricLookup[mc.Name]
if !ok {
userItem = schema.GlobalMetricListItem{
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint,
}
}
userItem.Availability = append(userItem.Availability, userAvailability)
userMetricLookup[mc.Name] = userItem
}
}
Clusters = append(Clusters, cluster)
@@ -144,9 +162,9 @@ func initClusterConfig() error {
for _, metric := range metricLookup {
GlobalMetricList = append(GlobalMetricList, &metric)
if !metric.Restrict {
GlobalUserMetricList = append(GlobalUserMetricList, &metric)
}
for _, metric := range userMetricLookup {
GlobalUserMetricList = append(GlobalUserMetricList, &metric)
}
return nil

View File

@@ -16,7 +16,7 @@ CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc="
Instructions
- `cd tools/convert-pem-pubkey-for-cc/`
- `cd tools/convert-pem-pubkey/`
- Insert your public ed25519 PEM key into `dummy.pub`
- `go run . dummy.pub`
- Copy the result into ClusterCockpit's `.env`

View File

@@ -1,11 +1,11 @@
# cc-frontend
[![Build](https://github.com/ClusterCockpit/cc-svelte-datatable/actions/workflows/build.yml/badge.svg)](https://github.com/ClusterCockpit/cc-svelte-datatable/actions/workflows/build.yml)
[![Build](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)
A frontend for [ClusterCockpit](https://github.com/ClusterCockpit/ClusterCockpit) and [cc-backend](https://github.com/ClusterCockpit/cc-backend). Backend specific configuration can de done using the constants defined in the `intro` section in `./rollup.config.js`.
A frontend for [ClusterCockpit](https://github.com/ClusterCockpit/ClusterCockpit) and [cc-backend](https://github.com/ClusterCockpit/cc-backend). Backend specific configuration can be done using the constants defined in the `intro` section in `./rollup.config.mjs`.
Builds on:
* [Svelte](https://svelte.dev/)
* [Svelte 5](https://svelte.dev/)
* [SvelteStrap](https://sveltestrap.js.org/)
* [Bootstrap 5](https://getbootstrap.com/)
* [urql](https://github.com/FormidableLabs/urql)

View File

@@ -333,7 +333,18 @@
{:else if thisJob && $jobMetrics?.data?.scopedJobStats}
<!-- Note: Ignore '#snippet' Error in IDE -->
{#snippet gridContent(item)}
{#if item.data}
{#if item?.disabled}
<Card color="info" class="mt-2">
<CardHeader class="mb-0">
<b>Disabled Metric</b>
</CardHeader>
<CardBody>
<p>No dataset(s) returned for <b>{item.metric}</b></p>
<p class="mb-1">Metric has been disabled for subcluster <b>{thisJob.subCluster}</b>.</p>
<p class="mb-1">To remove this card, open metric selection, de-select the metric, and press "Close and Apply".</p>
</CardBody>
</Card>
{:else if item?.data}
<Metric
bind:this={plots[item.metric]}
job={thisJob}
@@ -343,16 +354,6 @@
presetScopes={item.data.map((x) => x.scope)}
isShared={thisJob.shared != "none"}
/>
{:else if item.disabled == true}
<Card color="info">
<CardHeader class="mb-0">
<b>Disabled Metric</b>
</CardHeader>
<CardBody>
<p>Metric <b>{item.metric}</b> is disabled for cluster <b>{thisJob.cluster}:{thisJob.subCluster}</b>.</p>
<p class="mb-1">To remove this card, open metric selection and press "Close and Apply".</p>
</CardBody>
</Card>
{:else}
<Card color="warning" class="mt-2">
<CardHeader class="mb-0">

View File

@@ -142,6 +142,8 @@
<Filters
bind:this={filterComponent}
{filterPresets}
shortJobQuickSelect
shortJobCutoff={ccconfig?.jobList_hideShortRunningJobs}
showFilter={!showCompare}
matchedJobs={showCompare? matchedCompareJobs: matchedListJobs}
applyFilters={(detail) => {

View File

@@ -119,7 +119,7 @@
const filter = $derived([
{ cluster: { eq: cluster } },
{ node: { contains: hostname } },
{ node: { eq: hostname } },
{ state: ["running"] },
]);
@@ -253,12 +253,15 @@
forNode
/>
{:else if item.disabled === true && item.metric}
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="info"
>Metric disabled for subcluster <code
>{item.name}:{$nodeMetricsData.data.nodeMetrics[0]
.subCluster}</code
></Card
>
<Card color="info" class="mx-2">
<CardHeader class="mb-0">
<b>Disabled Metric</b>
</CardHeader>
<CardBody>
<p>No dataset(s) returned for <b>{item.name}</b></p>
<p class="mb-1">Metric has been disabled for subcluster <b>{$nodeMetricsData.data.nodeMetrics[0].subCluster}</b>.</p>
</CardBody>
</Card>
{:else}
<Card color="warning" class="mx-2">
<CardHeader class="mb-0">

View File

@@ -219,9 +219,10 @@
<Filters
bind:this={filterComponent}
{filterPresets}
shortJobQuickSelect
shortJobCutoff={ccconfig?.jobList_hideShortRunningJobs}
showFilter={!showCompare}
matchedJobs={showCompare? matchedCompareJobs: matchedListJobs}
startTimeQuickSelect
applyFilters={(detail) => {
jobFilters = [...detail.filters, { user: { eq: user.username } }];
selectedCluster = jobFilters[0]?.cluster

View File

@@ -6,6 +6,8 @@
- `filterPresets Object?`: Optional predefined filter values [Default: {}]
- `disableClusterSelection Bool?`: Is the selection disabled [Default: false]
- `startTimeQuickSelect Bool?`: Render startTime quick selections [Default: false]
- `shortJobQuickSelect Bool?`: Render short job quick selections [Default: false]
- `shortJobCutoff Int?`: Time in seconds for jobs to be considered short [Default: null]
- `matchedJobs Number?`: Number of jobs matching the filter [Default: -2]
- `showFilter Func`: If the filter component should be rendered in addition to total count info [Default: true]
- `applyFilters Func`: The callback function to apply current filter selection
@@ -25,6 +27,7 @@
ButtonGroup,
ButtonDropdown,
Icon,
Tooltip
} from "@sveltestrap/sveltestrap";
import Info from "./filters/InfoBox.svelte";
import Cluster from "./filters/Cluster.svelte";
@@ -36,6 +39,7 @@
import Resources from "./filters/Resources.svelte";
import Energy from "./filters/Energy.svelte";
import Statistics from "./filters/Stats.svelte";
import { formatDurationTime } from "./units.js";
/* Svelte 5 Props */
let {
@@ -43,6 +47,8 @@
filterPresets = {},
disableClusterSelection = false,
startTimeQuickSelect = false,
shortJobQuickSelect = false,
shortJobCutoff = 0,
matchedJobs = -2,
showFilter = true,
applyFilters
@@ -335,6 +341,44 @@
<DropdownItem onclick={() => (isStatsOpen = true)}>
<Icon name="bar-chart" onclick={() => (isStatsOpen = true)} /> Statistics
</DropdownItem>
{#if shortJobQuickSelect && shortJobCutoff > 0}
<DropdownItem divider />
<DropdownItem header>
Short Jobs Selection
<Icon id="shortjobsfilter-info" style="cursor:help; padding-right: 8px;" size="sm" name="info-circle"/>
<Tooltip target={`shortjobsfilter-info`} placement="right">
Job duration less than {formatDurationTime(shortJobCutoff)}
</Tooltip>
</DropdownItem>
<DropdownItem
onclick={() => {
filters.duration = {
moreThan: null,
lessThan: shortJobCutoff,
from: null,
to: null
}
updateFilters();
}}
>
<Icon name="stopwatch" />
Only Short Jobs
</DropdownItem>
<DropdownItem
onclick={() => {
filters.duration = {
moreThan: shortJobCutoff,
lessThan: null,
from: null,
to: null
}
updateFilters();
}}
>
<Icon name="stopwatch" />
Exclude Short Jobs
</DropdownItem>
{/if}
{#if startTimeQuickSelect}
<DropdownItem divider />
<DropdownItem header>Start Time Quick Selection</DropdownItem>

View File

@@ -112,11 +112,7 @@
// (Re-)query and optionally set new filters; Query will be started reactively.
export function queryJobs(filters) {
if (filters != null) {
let minRunningFor = ccconfig.jobList_hideShortRunningJobs;
if (minRunningFor && minRunningFor > 0) {
filters.push({ minRunningFor });
}
filter = filters;
filter = [...filters];
}
}

View File

@@ -180,10 +180,6 @@
// (Re-)query and optionally set new filters; Query will be started reactively.
export function queryJobs(filters) {
if (filters != null) {
let minRunningFor = ccconfig.jobList_hideShortRunningJobs;
if (minRunningFor && minRunningFor > 0) {
filters.push({ minRunningFor });
}
filter = [...filters];
}
};

View File

@@ -14,10 +14,10 @@
<script module>
export const startTimeSelectOptions = [
{ range: "", rangeLabel: "No Selection"},
{ range: "last6h", rangeLabel: "Last 6hrs"},
{ range: "last24h", rangeLabel: "Last 24hrs"},
{ range: "last7d", rangeLabel: "Last 7 days"},
{ range: "last30d", rangeLabel: "Last 30 days"}
{ range: "last6h", rangeLabel: "Job Start: Last 6hrs"},
{ range: "last24h", rangeLabel: "Job Start: Last 24hrs"},
{ range: "last7d", rangeLabel: "Job Start: Last 7 days"},
{ range: "last30d", rangeLabel: "Job Start: Last 30 days"}
];
</script>

View File

@@ -99,7 +99,7 @@
})
);
const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []);
const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope(metrics, $metricsQuery.data.jobMetrics) : []);
/* Effects */
$effect(() => {
@@ -140,6 +140,26 @@
});
}
function sortAndSelectScope(metricList = [], jobMetrics = []) {
const pendingData = [];
metricList.forEach((metricName) => {
const pendingMetric = {
name: metricName,
disabled: checkMetricDisabled(
globalMetrics,
metricName,
job.cluster,
job.subCluster,
),
data: null
};
const scopesData = jobMetrics.filter((jobMetric) => jobMetric.name == metricName)
if (scopesData.length > 0) pendingMetric.data = selectScope(scopesData)
pendingData.push(pendingMetric)
});
return pendingData;
};
const selectScope = (jobMetrics) =>
jobMetrics.reduce(
(a, b) =>
@@ -152,30 +172,6 @@
: a,
jobMetrics[0],
);
const sortAndSelectScope = (jobMetrics) =>
metrics
.map((name) => jobMetrics.filter((jobMetric) => jobMetric.name == name))
.map((jobMetrics) => ({
disabled: false,
data: jobMetrics.length > 0 ? selectScope(jobMetrics) : null,
}))
.map((jobMetric) => {
if (jobMetric.data) {
return {
name: jobMetric.data.name,
disabled: checkMetricDisabled(
globalMetrics,
jobMetric.data.name,
job.cluster,
job.subCluster,
),
data: jobMetric.data,
};
} else {
return jobMetric;
}
});
</script>
<tr>
@@ -211,13 +207,12 @@
{/if}
{#each refinedData as metric, i (metric?.name || i)}
<td>
{#key metric}
{#if metric?.data}
{#if metric?.disabled}
<Card body class="mx-2" color="info">
Metric <b>{metric.data.name}</b>: Disabled for subcluster <code>{job.subCluster}</code>
<p>No dataset(s) returned for <b>{metrics[i]}</b></p>
<p class="mb-1">Metric has been disabled for subcluster <b>{job.subCluster}</b>.</p>
</Card>
{:else}
{:else if metric?.data}
<MetricPlot
onZoom={(detail) => handleZoom(detail, metric.data.name)}
height={plotHeight}
@@ -234,7 +229,6 @@
zoomState={zoomStates[metric.data.name] || null}
thresholdState={thresholdStates[metric.data.name] || null}
/>
{/if}
{:else}
<Card body class="mx-2" color="warning">
<p>No dataset(s) returned for <b>{metrics[i]}</b></p>
@@ -243,7 +237,6 @@
<p class="mb-1">Identical messages in <i>job {job.jobId} row</i>: Host not found.</p>
</Card>
{/if}
{/key}
</td>
{:else}
<td>

View File

@@ -88,6 +88,7 @@
function printAvailability(metric, cluster) {
const avail = globalMetrics.find((gm) => gm.name === metric)?.availability
if (avail) {
if (!cluster) {
return avail.map((av) => av.cluster).join(', ')
} else {
@@ -99,6 +100,8 @@
}
}
}
return ""
}
function columnsDragOver(event) {
event.preventDefault();

View File

@@ -120,7 +120,6 @@
data: h.metrics.filter(
(m) => m?.name == selectedMetric && m.scope == "node",
),
// TODO: Move To New Func Variant With Disabled Check on WHole Cluster Level: This never Triggers!
disabled: checkMetricDisabled(globalMetrics, selectedMetric, cluster, h.subCluster),
}))
.sort((a, b) => a.host.localeCompare(b.host))
@@ -162,16 +161,18 @@
</Badge>
</span>
</div>
{#if item?.data}
{#if item.disabled === true}
<!-- TODO: Will never be Shown: Overview Single Metric Return Will be Null, see Else Case-->
<Card body class="mx-3" color="info"
>Metric disabled for subcluster <code
>{selectedMetric}:{item.subCluster}</code
></Card
>
{:else if item.disabled === false}
<!-- "No Data"-Warning included in MetricPlot-Component -->
{#if item?.disabled}
<Card color="info">
<CardHeader class="mb-0">
<b>Disabled Metric</b>
</CardHeader>
<CardBody>
<p>No dataset(s) returned for <b>{selectedMetric}</b></p>
<p class="mb-1">Metric has been disabled for subcluster <b>{item.subCluster}</b>.</p>
</CardBody>
</Card>
{:else if item?.data}
<!-- "Empty Series"-Warning included in MetricPlot-Component -->
<!-- #key: X-axis keeps last selected timerange otherwise -->
{#key item.data[0].metric.series[0].data.length}
<MetricPlot
@@ -185,12 +186,7 @@
/>
{/key}
{:else}
<Card body class="mx-3" color="info">
Global Metric List Not Initialized
Can not determine {selectedMetric} availability: Please Reload Page
</Card>
{/if}
{:else}
<!-- Should Not Appear -->
<Card color="warning">
<CardHeader class="mb-0">
<b>Missing Metric</b>
@@ -205,10 +201,22 @@
{/each}
{/key}
</Row>
{:else if hostnameFilter || hoststateFilter != 'all'}
<Row class="mx-1">
<Card class="px-0">
<CardHeader>
<b>Empty Filter Return</b>
</CardHeader>
<CardBody>
<p>No datasets returned for <b>{selectedMetric}</b>.</p>
<p class="mb-1">Hostname filter and/or host state filter returned no matches.</p>
</CardBody>
</Card>
</Row>
{:else}
<Row>
<Card color="warning">
<CardHeader class="mb-0">
<Row class="mx-1">
<Card class="px-0" color="warning">
<CardHeader>
<b>Missing Metric</b>
</CardHeader>
<CardBody>

View File

@@ -72,10 +72,30 @@
);
const extendedLegendData = $derived($nodeJobsData?.data ? buildExtendedLegend() : null);
const refinedData = $derived(nodeData?.metrics ? sortAndSelectScope(nodeData.metrics) : []);
const refinedData = $derived(nodeData?.metrics ? sortAndSelectScope(selectedMetrics, nodeData.metrics) : []);
const dataHealth = $derived(refinedData.filter((rd) => rd.disabled === false).map((enabled) => (enabled?.data?.metric?.series?.length > 0)));
/* Functions */
function sortAndSelectScope(metricList = [], nodeMetrics = []) {
const pendingData = [];
metricList.forEach((metricName) => {
const pendingMetric = {
name: metricName,
disabled: checkMetricDisabled(
globalMetrics,
metricName,
cluster,
nodeData.subCluster,
),
data: null
};
const scopesData = nodeMetrics.filter((nodeMetric) => nodeMetric.name == metricName)
if (scopesData.length > 0) pendingMetric.data = selectScope(scopesData)
pendingData.push(pendingMetric)
});
return pendingData;
};
const selectScope = (nodeMetrics) =>
nodeMetrics.reduce(
(a, b) =>
@@ -83,29 +103,6 @@
nodeMetrics[0],
);
const sortAndSelectScope = (allNodeMetrics) =>
selectedMetrics
.map((selectedName) => allNodeMetrics.filter((nodeMetric) => nodeMetric.name == selectedName))
.map((matchedNodeMetrics) => ({
disabled: false,
data: matchedNodeMetrics.length > 0 ? selectScope(matchedNodeMetrics) : null,
}))
.map((scopedNodeMetric) => {
if (scopedNodeMetric?.data) {
return {
disabled: checkMetricDisabled(
globalMetrics,
scopedNodeMetric.data.name,
cluster,
nodeData.subCluster,
),
data: scopedNodeMetric.data,
};
} else {
return scopedNodeMetric;
}
});
function buildExtendedLegend() {
let pendingExtendedLegendData = null
// Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes]
@@ -171,25 +168,17 @@
{/if}
</td>
{#each refinedData as metricData, i (metricData?.data?.name || i)}
{#key metricData}
<td>
{#if metricData?.disabled}
<Card body class="mx-2" color="info"
>Metric <b>{selectedMetrics[i]}</b> disabled for subcluster <code
>{nodeData.subCluster}</code
></Card
>
<Card body class="mx-2" color="info">
<p>No dataset(s) returned for <b>{selectedMetrics[i]}</b></p>
<p class="mb-1">Metric has been disabled for subcluster <b>{nodeData.subCluster}</b>.</p>
</Card>
{:else if !metricData?.data}
<Card body class="mx-2" color="warning">
<p>No dataset(s) returned for <b>{selectedMetrics[i]}</b></p>
<p class="mb-1">Metric was not found in metric store for cluster <b>{cluster}</b>.</p>
</Card>
{:else if !metricData?.data?.name}
<Card body class="mx-2" color="warning"
>Metric without name for subcluster <code
>{`Metric Index ${i}`}:{nodeData.subCluster}</code
></Card
>
{:else if !!metricData.data?.metric.statisticsSeries}
<!-- "No Data"-Warning included in MetricPlot-Component -->
<MetricPlot
@@ -233,6 +222,5 @@
/>
{/if}
</td>
{/key}
{/each}
</tr>