Merge branch 'master' into 40_45_82_update_roles

This commit is contained in:
Christoph Kluge
2023-02-21 17:17:41 +01:00
66 changed files with 3132 additions and 826 deletions

View File

@@ -135,7 +135,7 @@ type ApiTag struct {
type TagJobApiRequest []*ApiTag
func handleError(err error, statusCode int, rw http.ResponseWriter) {
log.Warnf("REST API: %s", err.Error())
log.Warnf("REST ERROR : %s", err.Error())
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(statusCode)
json.NewEncoder(rw).Encode(ErrorResponse{
@@ -170,7 +170,7 @@ func decode(r io.Reader, val interface{}) error {
// @router /jobs/ [get]
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -301,7 +301,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/tag_job/{id} [post]
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -366,7 +366,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/start_job/ [post]
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -447,7 +447,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/stop_job/{id} [post]
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -500,7 +500,7 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/stop_job/ [post]
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -546,7 +546,7 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/delete_job/{id} [delete]
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -594,7 +594,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
// @router /jobs/delete_job/ [delete]
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -650,7 +650,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
// @router /jobs/delete_job_before/{ts} [delete]
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
return
}
@@ -725,7 +725,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
// func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
// handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
// handleError(fmt.Errorf("missing role: %v", auth.RoleApi), http.StatusForbidden, rw)
// return
// }
@@ -794,7 +794,7 @@ func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
me := auth.GetUser(r.Context())
if !me.HasRole(auth.RoleAdmin) {
if username != me.Username {
http.Error(rw, "only admins are allowed to sign JWTs not for themselves", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to sign JWTs not for themselves", http.StatusForbidden)
return
}
}
@@ -819,13 +819,13 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
rw.Header().Set("Content-Type", "text/plain")
me := auth.GetUser(r.Context())
if !me.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to create new users", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
return
}
username, password, role, name, email, project := r.FormValue("username"), r.FormValue("password"), r.FormValue("role"), r.FormValue("name"), r.FormValue("email"), r.FormValue("project")
if len(password) == 0 && role != auth.RoleApi {
http.Error(rw, "only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
return
}
@@ -848,12 +848,12 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
return
}
rw.Write([]byte(fmt.Sprintf("User %#v successfully created!\n", username)))
rw.Write([]byte(fmt.Sprintf("User %v successfully created!\n", username)))
}
func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to delete a user", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
return
}
@@ -868,7 +868,7 @@ func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to fetch a list of users", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
return
}
@@ -899,7 +899,7 @@ func (api *RestApi) getRoles(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to update a user", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
return
}
@@ -943,7 +943,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
rw.Header().Set("Content-Type", "text/plain")
key, value := r.FormValue("key"), r.FormValue("value")
fmt.Printf("KEY: %#v\nVALUE: %#v\n", key, value)
fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, auth.GetUser(r.Context())); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
@@ -955,7 +955,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
if api.MachineStateDir == "" {
http.Error(rw, "not enabled", http.StatusNotFound)
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
return
}
@@ -986,7 +986,7 @@ func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
if api.MachineStateDir == "" {
http.Error(rw, "not enabled", http.StatusNotFound)
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
return
}

View File

@@ -185,30 +185,20 @@ func Init(db *sqlx.DB,
configs map[string]interface{}) (*Authentication, error) {
auth := &Authentication{}
auth.db = db
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL,
projects varchar(255) NOT NULL DEFAULT "[]");`)
if err != nil {
return nil, err
}
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Error("Error while initializing authentication -> decoding session key failed")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
@@ -216,12 +206,14 @@ func Init(db *sqlx.DB,
auth.LocalAuth = &LocalAuthenticator{}
if err := auth.LocalAuth.Init(auth, nil); err != nil {
log.Error("Error while initializing authentication -> localAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
auth.JwtAuth = &JWTAuthenticator{}
if err := auth.JwtAuth.Init(auth, configs["jwt"]); err != nil {
log.Error("Error while initializing authentication -> jwtAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.JwtAuth)
@@ -229,6 +221,7 @@ func Init(db *sqlx.DB,
if config, ok := configs["ldap"]; ok {
auth.LdapAuth = &LdapAuthenticator{}
if err := auth.LdapAuth.Init(auth, config); err != nil {
log.Error("Error while initializing authentication -> ldapAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
@@ -243,6 +236,7 @@ func (auth *Authentication) AuthViaSession(
session, err := auth.sessionStore.Get(r, "session")
if err != nil {
log.Error("Error while getting session store")
return nil, err
}
@@ -272,7 +266,7 @@ func (auth *Authentication) Login(
user := (*User)(nil)
if username != "" {
if user, _ = auth.GetUser(username); err != nil {
// log.Warnf("login of unkown user %#v", username)
// log.Warnf("login of unkown user %v", username)
_ = err
}
}
@@ -284,7 +278,7 @@ func (auth *Authentication) Login(
user, err = authenticator.Login(user, rw, r)
if err != nil {
log.Warnf("login failed: %s", err.Error())
log.Warnf("user '%s' login failed: %s", user.Username, err.Error())
onfailure(rw, r, err)
return
}
@@ -303,7 +297,7 @@ func (auth *Authentication) Login(
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
if err := auth.sessionStore.Save(r, rw, session); err != nil {
log.Errorf("session save failed: %s", err.Error())
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}

View File

@@ -45,11 +45,13 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
} else {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
log.Warn("Could not decode JWT public key")
return err
}
ja.publicKey = ed25519.PublicKey(bytes)
bytes, err = base64.StdEncoding.DecodeString(privKey)
if err != nil {
log.Warn("Could not decode JWT private key")
return err
}
ja.privateKey = ed25519.PrivateKey(bytes)
@@ -58,6 +60,7 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
if pubKey = os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
log.Warn("Could not decode cross login JWT HS512 key")
return err
}
ja.loginTokenKey = bytes
@@ -68,6 +71,7 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
if keyFound && pubKeyCrossLogin != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
if err != nil {
log.Warn("Could not decode cross login JWT public key")
return err
}
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
@@ -123,13 +127,15 @@ func (ja *JWTAuthenticator) Login(
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
return ja.loginTokenKey, nil
}
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
return nil, fmt.Errorf("AUTH/JWT > unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
})
if err != nil {
log.Warn("Error while parsing jwt token")
return nil, err
}
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, err
}
@@ -151,6 +157,7 @@ func (ja *JWTAuthenticator) Login(
if user == nil {
user, err = ja.auth.GetUser(sub)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%v'", sub)
return nil, err
} else if user == nil {
user = &User{
@@ -159,6 +166,7 @@ func (ja *JWTAuthenticator) Login(
AuthSource: AuthViaToken,
}
if err := ja.auth.AddUser(user); err != nil {
log.Errorf("Error while adding user '%v' to auth from token", user.Username)
return nil, err
}
}
@@ -223,11 +231,13 @@ func (ja *JWTAuthenticator) Auth(
return ja.publicKey, nil
})
if err != nil {
log.Warn("Error while parsing token")
return nil, err
}
// Check token validity
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, err
}
@@ -276,7 +286,7 @@ func (ja *JWTAuthenticator) Auth(
session.Values["roles"] = roles
if err := ja.auth.sessionStore.Save(r, rw, session); err != nil {
log.Errorf("session save failed: %s", err.Error())
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return nil, err
}

View File

@@ -39,21 +39,23 @@ func (la *LdapAuthenticator) Init(
if la.config != nil && la.config.SyncInterval != "" {
interval, err := time.ParseDuration(la.config.SyncInterval)
if err != nil {
log.Warnf("Could not parse duration for sync interval: %v", la.config.SyncInterval)
return err
}
if interval == 0 {
log.Info("Sync interval is zero")
return nil
}
go func() {
ticker := time.NewTicker(interval)
for t := range ticker.C {
log.Printf("LDAP sync started at %s", t.Format(time.RFC3339))
log.Printf("sync started at %s", t.Format(time.RFC3339))
if err := la.Sync(); err != nil {
log.Errorf("LDAP sync failed: %s", err.Error())
log.Errorf("sync failed: %s", err.Error())
}
log.Print("LDAP sync done")
log.Print("sync done")
}
}()
}
@@ -76,12 +78,14 @@ func (la *LdapAuthenticator) Login(
l, err := la.getLdapConnection(false)
if err != nil {
log.Warn("Error while getting ldap connection")
return nil, err
}
defer l.Close()
userDn := strings.Replace(la.config.UserBind, "{username}", user.Username, -1)
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
log.Error("Error while binding to ldap connection")
return nil, err
}
@@ -104,12 +108,14 @@ func (la *LdapAuthenticator) Sync() error {
users := map[string]int{}
rows, err := la.auth.db.Query(`SELECT username FROM user WHERE user.ldap = 1`)
if err != nil {
log.Warn("Error while querying LDAP users")
return err
}
for rows.Next() {
var username string
if err := rows.Scan(&username); err != nil {
log.Warnf("Error while scanning for user '%s'", username)
return err
}
@@ -118,6 +124,7 @@ func (la *LdapAuthenticator) Sync() error {
l, err := la.getLdapConnection(true)
if err != nil {
log.Error("LDAP connection error")
return err
}
defer l.Close()
@@ -126,6 +133,7 @@ func (la *LdapAuthenticator) Sync() error {
la.config.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
la.config.UserFilter, []string{"dn", "uid", "gecos"}, nil))
if err != nil {
log.Warn("LDAP search error")
return err
}
@@ -147,15 +155,17 @@ func (la *LdapAuthenticator) Sync() error {
for username, where := range users {
if where == IN_DB && la.config.SyncDelOldUsers {
log.Debugf("ldap-sync: remove %#v (does not show up in LDAP anymore)", username)
log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
if _, err := la.auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
log.Errorf("User '%s' not in LDAP anymore: Delete from DB failed", username)
return err
}
} else if where == IN_LDAP {
name := newnames[username]
log.Debugf("ldap-sync: add %#v (name: %#v, roles: [user], ldap: true)", username, name)
log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
if _, err := la.auth.db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`,
username, 1, name, "[\""+RoleUser+"\"]"); err != nil {
log.Errorf("User '%s' new in LDAP: Insert into DB failed", username)
return err
}
}
@@ -170,12 +180,14 @@ func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
conn, err := ldap.DialURL(la.config.Url)
if err != nil {
log.Warn("LDAP URL dial failed")
return nil, err
}
if admin {
if err := conn.Bind(la.config.SearchDN, la.syncPassword); err != nil {
conn.Close()
log.Warn("LDAP connection bind failed")
return nil, err
}
}

View File

@@ -39,7 +39,7 @@ func (la *LocalAuthenticator) Login(
r *http.Request) (*User, error) {
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
return nil, fmt.Errorf("user '%s' provided the wrong password (%w)", user.Username, e)
return nil, fmt.Errorf("AUTH/LOCAL > user '%s' provided the wrong password (%w)", user.Username, e)
}
return user, nil

View File

@@ -25,6 +25,7 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
Where("user.username = ?", username).RunWith(auth.db).
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
log.Warnf("Error while querying user '%v' from database", username)
return nil, err
}
@@ -33,6 +34,7 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
user.Email = email.String
if rawRoles.Valid {
if err := json.Unmarshal([]byte(rawRoles.String), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw roles from DB")
return nil, err
}
}
@@ -64,6 +66,7 @@ func (auth *Authentication) AddUser(user *User) error {
if user.Password != "" {
password, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost)
if err != nil {
log.Error("Error while encrypting new user password")
return err
}
cols = append(cols, "password")
@@ -71,6 +74,7 @@ func (auth *Authentication) AddUser(user *User) error {
}
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
return err
}
@@ -81,6 +85,7 @@ func (auth *Authentication) AddUser(user *User) error {
func (auth *Authentication) DelUser(username string) error {
_, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
log.Errorf("Error while deleting user '%s' from DB", username)
return err
}
@@ -93,6 +98,7 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
rows, err := q.RunWith(auth.db).Query()
if err != nil {
log.Warn("Error while querying user list")
return nil, err
}
@@ -104,10 +110,12 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
user := &User{}
var name, email sql.NullString
if err := rows.Scan(&user.Username, &name, &email, &rawroles, &rawprojects); err != nil {
log.Warn("Error while scanning user list")
return nil, err
}
if err := json.Unmarshal([]byte(rawroles), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw role list")
return nil, err
}
@@ -129,11 +137,12 @@ func (auth *Authentication) AddRole(
user, err := auth.GetUser(username)
if err != nil {
log.Warnf("Could not load user '%s'", username)
return err
}
if !IsValidRole(role) {
return fmt.Errorf("invalid user role: %#v", role)
return fmt.Errorf("Invalid user role: %v", role)
}
if user.HasRole(role) {
@@ -142,6 +151,7 @@ func (auth *Authentication) AddRole(
roles, _ := json.Marshal(append(user.Roles, role))
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while adding new role for user '%s'", user.Username)
return err
}
return nil
@@ -150,11 +160,12 @@ func (auth *Authentication) AddRole(
func (auth *Authentication) RemoveRole(ctx context.Context, username string, role string) error {
user, err := auth.GetUser(username)
if err != nil {
log.Warnf("Could not load user '%s'", username)
return err
}
if !IsValidRole(role) {
return fmt.Errorf("invalid user role: %#v", role)
return fmt.Errorf("Invalid user role: %#v", role)
}
if role == RoleManager && len(user.Projects) != 0 {
@@ -174,11 +185,12 @@ func (auth *Authentication) RemoveRole(ctx context.Context, username string, rol
if exists == true {
var mroles, _ = json.Marshal(newroles)
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while removing role for user '%s'", user.Username)
return err
}
return nil
} else {
return fmt.Errorf("user %#v already does not have role %#v", username, role)
return fmt.Errorf("User '%v' already does not have role: %v", username, role)
}
}
@@ -259,9 +271,13 @@ func FetchUser(ctx context.Context, db *sqlx.DB, username string) (*model.User,
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
RunWith(db).QueryRow().Scan(&name, &email); err != nil {
if err == sql.ErrNoRows {
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
/* since FetchUser will be called to retrieve full name and mail for every job in query/list */
// log.Warnf("User '%s' Not found in DB", username)
return nil, nil
}
log.Warnf("Error while fetching user '%s'", username)
return nil, err
}

View File

@@ -49,7 +49,7 @@ func Init(flagConfigFile string) {
raw, err := os.ReadFile(flagConfigFile)
if err != nil {
if !os.IsNotExist(err) {
log.Fatal(err)
log.Fatalf("CONFIG ERROR: %v", err)
}
} else {
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
@@ -58,7 +58,7 @@ func Init(flagConfigFile string) {
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil {
log.Fatal(err)
log.Fatalf("could not decode: %v", err)
}
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {

View File

@@ -19,7 +19,7 @@ func TestInit(t *testing.T) {
func TestInitMinimal(t *testing.T) {
fp := "../../docs/config.json"
Init(fp)
if Keys.Addr != "0.0.0.0:8080" {
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:8080", Keys.Addr)
if Keys.Addr != "127.0.0.1:8080" {
t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
}
}

View File

@@ -88,6 +88,7 @@ type ComplexityRoot struct {
Exclusive func(childComplexity int) int
ID func(childComplexity int) int
JobID func(childComplexity int) int
JobName func(childComplexity int) int
MetaData func(childComplexity int) int
MonitoringStatus func(childComplexity int) int
NumAcc func(childComplexity int) int
@@ -130,6 +131,7 @@ type ComplexityRoot struct {
HistDuration func(childComplexity int) int
HistNumNodes func(childComplexity int) int
ID func(childComplexity int) int
Name func(childComplexity int) int
ShortJobs func(childComplexity int) int
TotalCoreHours func(childComplexity int) int
TotalJobs func(childComplexity int) int
@@ -263,6 +265,8 @@ type ClusterResolver interface {
Partitions(ctx context.Context, obj *schema.Cluster) ([]string, error)
}
type JobResolver interface {
JobName(ctx context.Context, obj *schema.Job) (*string, error)
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
MetaData(ctx context.Context, obj *schema.Job) (interface{}, error)
@@ -452,6 +456,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Job.JobID(childComplexity), true
case "Job.jobName":
if e.complexity.Job.JobName == nil {
break
}
return e.complexity.Job.JobName(childComplexity), true
case "Job.metaData":
if e.complexity.Job.MetaData == nil {
break
@@ -662,6 +673,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.JobsStatistics.ID(childComplexity), true
case "JobsStatistics.name":
if e.complexity.JobsStatistics.Name == nil {
break
}
return e.complexity.JobsStatistics.Name(childComplexity), true
case "JobsStatistics.shortJobs":
if e.complexity.JobsStatistics.ShortJobs == nil {
break
@@ -1391,6 +1409,7 @@ type Job {
jobId: Int!
user: String!
project: String!
jobName: String
cluster: String!
subCluster: String!
startTime: Time!
@@ -1578,14 +1597,15 @@ type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { from: Time!, to: Time! }
input JobFilter {
tags: [ID!]
jobId: StringInput
arrayJobId: Int
user: StringInput
project: StringInput
cluster: StringInput
partition: StringInput
duration: IntRange
tags: [ID!]
jobId: StringInput
arrayJobId: Int
user: StringInput
project: StringInput
jobName: StringInput
cluster: StringInput
partition: StringInput
duration: IntRange
minRunningFor: Int
@@ -1616,6 +1636,7 @@ input StringInput {
contains: String
startsWith: String
endsWith: String
in: [String!]
}
input IntRange { from: Int!, to: Int! }
@@ -1636,6 +1657,7 @@ type HistoPoint {
type JobsStatistics {
id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster
name: String # if User-Statistics: Given Name of Account (ID) Owner
totalJobs: Int! # Number of jobs that matched
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
@@ -3038,6 +3060,47 @@ func (ec *executionContext) fieldContext_Job_project(ctx context.Context, field
return fc, nil
}
func (ec *executionContext) _Job_jobName(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_jobName(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Job().JobName(rctx, obj)
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(*string)
fc.Result = res
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_jobName(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_cluster(ctx, field)
if err != nil {
@@ -4231,6 +4294,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(ctx context.Context
return ec.fieldContext_Job_user(ctx, field)
case "project":
return ec.fieldContext_Job_project(ctx, field)
case "jobName":
return ec.fieldContext_Job_jobName(ctx, field)
case "cluster":
return ec.fieldContext_Job_cluster(ctx, field)
case "subCluster":
@@ -4441,6 +4506,47 @@ func (ec *executionContext) fieldContext_JobsStatistics_id(ctx context.Context,
return fc, nil
}
func (ec *executionContext) _JobsStatistics_name(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_JobsStatistics_name(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.Name, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(*string)
fc.Result = res
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_JobsStatistics_name(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "JobsStatistics",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
if err != nil {
@@ -6078,6 +6184,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
return ec.fieldContext_Job_user(ctx, field)
case "project":
return ec.fieldContext_Job_project(ctx, field)
case "jobName":
return ec.fieldContext_Job_jobName(ctx, field)
case "cluster":
return ec.fieldContext_Job_cluster(ctx, field)
case "subCluster":
@@ -6357,6 +6465,8 @@ func (ec *executionContext) fieldContext_Query_jobsStatistics(ctx context.Contex
switch field.Name {
case "id":
return ec.fieldContext_JobsStatistics_id(ctx, field)
case "name":
return ec.fieldContext_JobsStatistics_name(ctx, field)
case "totalJobs":
return ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
case "shortJobs":
@@ -10389,7 +10499,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
asMap[k] = v
}
fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"}
fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"}
for _, k := range fieldsInOrder {
v, ok := asMap[k]
if !ok {
@@ -10436,6 +10546,14 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
if err != nil {
return it, err
}
case "jobName":
var err error
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("jobName"))
it.JobName, err = ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
if err != nil {
return it, err
}
case "cluster":
var err error
@@ -10629,7 +10747,7 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
asMap[k] = v
}
fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith"}
fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith", "in"}
for _, k := range fieldsInOrder {
v, ok := asMap[k]
if !ok {
@@ -10668,6 +10786,14 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
if err != nil {
return it, err
}
case "in":
var err error
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("in"))
it.In, err = ec.unmarshalOString2ᚕstringᚄ(ctx, v)
if err != nil {
return it, err
}
}
}
@@ -11000,6 +11126,23 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
if out.Values[i] == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
case "jobName":
field := field
innerFunc := func(ctx context.Context) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Job_jobName(ctx, field, obj)
return res
}
out.Concurrently(i, func() graphql.Marshaler {
return innerFunc(ctx)
})
case "cluster":
out.Values[i] = ec._Job_cluster(ctx, field, obj)
@@ -11312,6 +11455,10 @@ func (ec *executionContext) _JobsStatistics(ctx context.Context, sel ast.Selecti
if out.Values[i] == graphql.Null {
invalids++
}
case "name":
out.Values[i] = ec._JobsStatistics_name(ctx, field, obj)
case "totalJobs":
out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj)

View File

@@ -42,6 +42,7 @@ type JobFilter struct {
ArrayJobID *int `json:"arrayJobId"`
User *StringInput `json:"user"`
Project *StringInput `json:"project"`
JobName *StringInput `json:"jobName"`
Cluster *StringInput `json:"cluster"`
Partition *StringInput `json:"partition"`
Duration *schema.IntRange `json:"duration"`
@@ -71,6 +72,7 @@ type JobResultList struct {
type JobsStatistics struct {
ID string `json:"id"`
Name *string `json:"name"`
TotalJobs int `json:"totalJobs"`
ShortJobs int `json:"shortJobs"`
TotalWalltime int `json:"totalWalltime"`
@@ -101,10 +103,11 @@ type PageRequest struct {
}
type StringInput struct {
Eq *string `json:"eq"`
Contains *string `json:"contains"`
StartsWith *string `json:"startsWith"`
EndsWith *string `json:"endsWith"`
Eq *string `json:"eq"`
Contains *string `json:"contains"`
StartsWith *string `json:"startsWith"`
EndsWith *string `json:"endsWith"`
In []string `json:"in"`
}
type TimeRangeOutput struct {

View File

@@ -16,6 +16,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -24,6 +25,11 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
return r.Repo.Partitions(obj.Name)
}
// JobName is the resolver for the jobName field.
func (r *jobResolver) JobName(ctx context.Context, obj *schema.Job) (*string, error) {
return r.Repo.FetchJobName(obj)
}
// Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(&obj.ID)
@@ -43,6 +49,7 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name)
if err != nil {
log.Warn("Error while creating tag")
return nil, err
}
@@ -58,6 +65,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while adding tag to job")
return nil, err
}
@@ -65,10 +73,12 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
for _, tagId := range tagIds {
tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil {
log.Warn("Error while parsing tag id")
return nil, err
}
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
log.Warn("Error while adding tag")
return nil, err
}
}
@@ -80,6 +90,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while parsing job id")
return nil, err
}
@@ -87,10 +98,12 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
for _, tagId := range tagIds {
tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil {
log.Warn("Error while parsing tag id")
return nil, err
}
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
log.Warn("Error while removing tag")
return nil, err
}
}
@@ -101,6 +114,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
// UpdateConfiguration is the resolver for the updateConfiguration field.
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, auth.GetUser(ctx)); err != nil {
log.Warn("Error while updating user config")
return nil, err
}
@@ -126,6 +140,7 @@ func (r *queryResolver) User(ctx context.Context, username string) (*model.User,
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
data, err := r.Repo.AllocatedNodes(cluster)
if err != nil {
log.Warn("Error while fetching allocated nodes")
return nil, err
}
@@ -144,11 +159,13 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
numericId, err := strconv.ParseInt(id, 10, 64)
if err != nil {
log.Warn("Error while parsing job id")
return nil, err
}
job, err := r.Repo.FindById(numericId)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
@@ -163,11 +180,13 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warn("Error while querying job for metrics")
return nil, err
}
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
if err != nil {
log.Warn("Error while loading job data")
return nil, err
}
@@ -175,7 +194,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
for name, md := range data {
for scope, metric := range md {
if metric.Scope != schema.MetricScope(scope) {
panic("WTF?")
log.Panic("metric.Scope != schema.MetricScope(scope) : Should not happen!")
}
res = append(res, &model.JobMetricWithName{
@@ -204,11 +223,13 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
jobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
if err != nil {
log.Warn("Error while querying jobs")
return nil, err
}
count, err := r.Repo.CountJobs(ctx, filter)
if err != nil {
log.Warn("Error while counting jobs")
return nil, err
}
@@ -217,13 +238,14 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
// JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
return r.jobsStatistics(ctx, filter, groupBy)
return r.Repo.JobsStatistics(ctx, filter, groupBy)
}
// JobsCount is the resolver for the jobsCount field.
func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) {
counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit)
if err != nil {
log.Warn("Error while counting grouped jobs")
return nil, err
}
@@ -257,6 +279,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
log.Warn("Error while loading node data")
return nil, err
}

View File

@@ -6,217 +6,16 @@ package graph
import (
"context"
"database/sql"
"errors"
"fmt"
"math"
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
const ShortJobDuration int = 5 * 60
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
stats := map[string]*model.JobsStatistics{}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
var rawQuery sq.SelectBuilder
if groupBy == nil {
rawQuery = sq.Select(
"''",
"COUNT(job.id)",
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
corehoursCol,
).From("job")
} else {
col := groupBy2column[*groupBy]
rawQuery = sq.Select(
col,
"COUNT(job.id)",
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
corehoursCol,
).From("job").GroupBy(col)
}
rawQuery = rawQuery.
Where("job.cluster = ?", cluster.Name).
Where("job.subcluster = ?", subcluster.Name)
query, qerr := repository.SecurityCheck(ctx, rawQuery)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
return nil, err
}
for rows.Next() {
var id sql.NullString
var jobs, walltime, corehours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
return nil, err
}
if id.Valid {
if s, ok := stats[id.String]; ok {
s.TotalJobs += int(jobs.Int64)
s.TotalWalltime += int(walltime.Int64)
s.TotalCoreHours += int(corehours.Int64)
} else {
stats[id.String] = &model.JobsStatistics{
ID: id.String,
TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64),
TotalCoreHours: int(corehours.Int64),
}
}
}
}
}
}
if groupBy == nil {
query, qerr := repository.SecurityCheck(ctx, sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration))
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
return nil, err
}
} else {
col := groupBy2column[*groupBy]
query, qerr := repository.SecurityCheck(ctx, sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration))
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
return nil, err
}
for rows.Next() {
var id sql.NullString
var shortJobs sql.NullInt64
if err := rows.Scan(&id, &shortJobs); err != nil {
return nil, err
}
if id.Valid {
stats[id.String].ShortJobs = int(shortJobs.Int64)
}
}
}
// Calculating the histogram data is expensive, so only do it if needed.
// An explicit resolver can not be used because we need to know the filters.
histogramsNeeded := false
fields := graphql.CollectFieldsCtx(ctx, nil)
for _, col := range fields {
if col.Name == "histDuration" || col.Name == "histNumNodes" {
histogramsNeeded = true
}
}
res := make([]*model.JobsStatistics, 0, len(stats))
for _, stat := range stats {
res = append(res, stat)
id, col := "", ""
if groupBy != nil {
id = stat.ID
col = groupBy2column[*groupBy]
}
if histogramsNeeded {
var err error
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as int) as value`, time.Now().Unix())
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
if err != nil {
return nil, err
}
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
if err != nil {
return nil, err
}
}
}
return res, nil
}
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
// to add a condition to the query of the kind "<col> = <id>".
func (r *queryResolver) jobsStatisticsHistogram(ctx context.Context, value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
query, qerr := repository.SecurityCheck(ctx, sq.Select(value, "COUNT(job.id) AS count").From("job"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
query = repository.BuildWhereClause(f, query)
}
if len(id) != 0 && len(col) != 0 {
query = query.Where(col+" = ?", id)
}
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
if err != nil {
return nil, err
}
points := make([]*model.HistoPoint, 0)
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
return nil, err
}
points = append(points, &point)
}
return points, nil
}
const MAX_JOBS_FOR_ANALYSIS = 500
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
@@ -228,10 +27,11 @@ func (r *queryResolver) rooflineHeatmap(
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
log.Error("Error while querying jobs for roofline")
return nil, err
}
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
}
fcols, frows := float64(cols), float64(rows)
@@ -248,19 +48,20 @@ func (r *queryResolver) rooflineHeatmap(
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil {
log.Error("Error while loading metrics for roofline")
return nil, err
}
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
if flops_ == nil && membw_ == nil {
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %d", job.ID)
return nil, fmt.Errorf("GRAPH/STATS > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
}
flops, ok1 := flops_["node"]
membw, ok2 := membw_["node"]
if !ok1 || !ok2 {
// TODO/FIXME:
return nil, errors.New("todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
return nil, errors.New("GRAPH/STATS > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
}
for n := 0; n < len(flops.Series); n++ {
@@ -292,10 +93,11 @@ func (r *queryResolver) rooflineHeatmap(
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
log.Error("Error while querying jobs for footprint")
return nil, err
}
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
}
avgs := make([][]schema.Float, len(metrics))
@@ -310,6 +112,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
}
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Error("Error while loading averages for footprint")
return nil, err
}

View File

@@ -16,6 +16,7 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -78,6 +79,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
var config CCMetricStoreConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err
}
@@ -124,11 +126,13 @@ func (ccms *CCMetricStore) doRequest(
buf := &bytes.Buffer{}
if err := json.NewEncoder(buf).Encode(body); err != nil {
log.Warn("Error while encoding request body")
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
if err != nil {
log.Warn("Error while building request body")
return nil, err
}
if ccms.jwt != "" {
@@ -137,6 +141,7 @@ func (ccms *CCMetricStore) doRequest(
res, err := ccms.client.Do(req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -146,6 +151,7 @@ func (ccms *CCMetricStore) doRequest(
var resBody ApiQueryResponse
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
log.Warn("Error while decoding result body")
return nil, err
}
@@ -161,6 +167,7 @@ func (ccms *CCMetricStore) LoadData(
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
if err != nil {
log.Warn("Error while building queries")
return nil, err
}
@@ -175,6 +182,7 @@ func (ccms *CCMetricStore) LoadData(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -202,6 +210,7 @@ func (ccms *CCMetricStore) LoadData(
for _, res := range row {
if res.Error != nil {
/* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
continue
}
@@ -245,7 +254,8 @@ func (ccms *CCMetricStore) LoadData(
}
if len(errors) != 0 {
return jobData, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
/* Returns list for "partial errors" */
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return jobData, nil
@@ -272,8 +282,8 @@ func (ccms *CCMetricStore) buildQueries(
remoteName := ccms.toRemoteName(metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil {
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
continue
}
@@ -483,7 +493,7 @@ func (ccms *CCMetricStore) buildQueries(
continue
}
return nil, nil, fmt.Errorf("TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
}
}
}
@@ -498,6 +508,7 @@ func (ccms *CCMetricStore) LoadStats(
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode})
if err != nil {
log.Warn("Error while building query")
return nil, err
}
@@ -512,6 +523,7 @@ func (ccms *CCMetricStore) LoadStats(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -521,7 +533,7 @@ func (ccms *CCMetricStore) LoadStats(
metric := ccms.toLocalName(query.Metric)
data := res[0]
if data.Error != nil {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
}
metricdata, ok := stats[metric]
@@ -531,7 +543,7 @@ func (ccms *CCMetricStore) LoadStats(
}
if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
}
metricdata[query.Hostname] = schema.MetricStatistics{
@@ -577,6 +589,7 @@ func (ccms *CCMetricStore) LoadNodeData(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -593,11 +606,12 @@ func (ccms *CCMetricStore) LoadNodeData(
metric := ccms.toLocalName(query.Metric)
qdata := res[0]
if qdata.Error != nil {
/* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error))
}
if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() {
// return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0.
}
@@ -627,7 +641,8 @@ func (ccms *CCMetricStore) LoadNodeData(
}
if len(errors) != 0 {
return data, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
/* Returns list of "partial errors" */
return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return data, nil

View File

@@ -10,11 +10,11 @@ import (
"encoding/json"
"errors"
"fmt"
"log"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
@@ -37,6 +37,7 @@ type InfluxDBv2DataRepository struct {
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
var config InfluxDBv2DataRepositoryConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err
}
@@ -71,7 +72,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
}
@@ -84,7 +85,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
switch scope {
case "node":
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
// log.Println("Note: Scope 'node' requested. ")
// log.Info("Scope 'node' requested. ")
query = fmt.Sprintf(`
from(bucket: "%s")
|> range(start: %s, stop: %s)
@@ -97,10 +98,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
measurementsCond, hostsCond)
case "socket":
log.Println("Note: Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
continue
case "core":
log.Println("Note: Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
continue
// Get Finest Granularity only, Set NULL to 0.0
// query = fmt.Sprintf(`
@@ -114,13 +115,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
// measurementsCond, hostsCond)
default:
log.Println("Note: Unknown Scope requested: Will return 'node' scope. ")
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node'")
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
}
rows, err := idb.queryClient.Query(ctx, query)
if err != nil {
log.Error("Error while performing query")
return nil, err
}
@@ -192,6 +194,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
// }
default:
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
}
@@ -202,21 +205,22 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// Get Stats
stats, err := idb.LoadStats(job, metrics, ctx)
if err != nil {
log.Warn("Error while loading statistics")
return nil, err
}
for _, scope := range scopes {
if scope == "node" { // No 'socket/core' support yet
for metric, nodes := range stats {
// log.Println(fmt.Sprintf("<< Add Stats for : Field %s >>", metric))
// log.Debugf("<< Add Stats for : Field %s >>", metric)
for node, stats := range nodes {
// log.Println(fmt.Sprintf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg ))
// log.Debugf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg )
for index, _ := range jobData[metric][scope].Series {
// log.Println(fmt.Sprintf("<< Try to add Stats to Series in Position %d >>", index))
// log.Debugf("<< Try to add Stats to Series in Position %d >>", index)
if jobData[metric][scope].Series[index].Hostname == node {
// log.Println(fmt.Sprintf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname))
// log.Debugf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname)
jobData[metric][scope].Series[index].Statistics = &schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
// log.Println(fmt.Sprintf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg))
// log.Debugf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg)
}
}
}
@@ -228,9 +232,9 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// for _, scope := range scopes {
// for _, met := range metrics {
// for _, series := range jobData[met][scope].Series {
// log.Println(fmt.Sprintf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// log.Debugf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// len(series.Data), met, series.Hostname, scope,
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg))
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg)
// }
// }
// }
@@ -249,7 +253,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
}
@@ -258,7 +262,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
// lenMet := len(metrics)
for _, metric := range metrics {
// log.Println(fmt.Sprintf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet))
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
query := fmt.Sprintf(`
data = from(bucket: "%s")
@@ -275,6 +279,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
rows, err := idb.queryClient.Query(ctx, query)
if err != nil {
log.Error("Error while performing query")
return nil, err
}
@@ -285,17 +290,17 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
avg, avgok := row.ValueByKey("avg").(float64)
if !avgok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg))
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
avg = 0.0
}
min, minok := row.ValueByKey("min").(float64)
if !minok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min))
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
min = 0.0
}
max, maxok := row.ValueByKey("max").(float64)
if !maxok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max))
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
max = 0.0
}
@@ -319,7 +324,7 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
// TODO : Implement to be used in Analysis- und System/Node-View
log.Println(fmt.Sprintf("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes))
log.Infof("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
return nil, errors.New("unimplemented for InfluxDBv2DataRepository")
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
}

View File

@@ -46,6 +46,7 @@ func Init(disableArchive bool) error {
Kind string `json:"kind"`
}
if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil {
log.Warn("Error while unmarshaling raw json MetricDataRepository")
return err
}
@@ -60,10 +61,11 @@ func Init(disableArchive bool) error {
case "test":
mdr = &TestMetricDataRepository{}
default:
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", kind.Kind, cluster.Name)
return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
}
if err := mdr.Init(cluster.MetricDataRepository); err != nil {
log.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
return err
}
metricDataRepos[cluster.Name] = mdr
@@ -90,7 +92,7 @@ func LoadData(job *schema.Job,
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
@@ -107,8 +109,9 @@ func LoadData(job *schema.Job,
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Errorf("partial error: %s", err.Error())
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
@@ -116,6 +119,7 @@ func LoadData(job *schema.Job,
} else {
jd, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
@@ -163,6 +167,7 @@ func LoadData(job *schema.Job,
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
@@ -182,11 +187,12 @@ func LoadAverages(
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
@@ -217,7 +223,7 @@ func LoadNodeData(
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster)
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
@@ -229,14 +235,15 @@ func LoadNodeData(
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Errorf("partial error: %s", err.Error())
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", cluster)
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
@@ -303,6 +310,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}

View File

@@ -5,46 +5,46 @@
package metricdata
import (
"os"
"errors"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"text/template"
"bytes"
"net/http"
"time"
"math"
"sort"
"net/http"
"os"
"regexp"
"sort"
"strings"
"sync"
"text/template"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
promapi "github.com/prometheus/client_golang/api"
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
promcfg "github.com/prometheus/common/config"
promm "github.com/prometheus/common/model"
)
type PrometheusDataRepositoryConfig struct {
Url string `json:"url"`
Username string `json:"username,omitempty"`
Suffix string `json:"suffix,omitempty"`
Templates map[string]string `json:"query-templates"`
Url string `json:"url"`
Username string `json:"username,omitempty"`
Suffix string `json:"suffix,omitempty"`
Templates map[string]string `json:"query-templates"`
}
type PrometheusDataRepository struct {
client promapi.Client
queryClient promv1.API
suffix string
templates map[string]*template.Template
client promapi.Client
queryClient promv1.API
suffix string
templates map[string]*template.Template
}
type PromQLArgs struct {
Nodes string
Nodes string
}
type Trie map[rune]Trie
@@ -60,10 +60,9 @@ func contains(s []schema.MetricScope, str schema.MetricScope) bool {
return false
}
func MinMaxMean(data []schema.Float) (float64, float64, float64) {
if len(data) == 0 {
return 0.0, 0.0, 0.0
return 0.0, 0.0, 0.0
}
min := math.MaxFloat64
max := -math.MaxFloat64
@@ -75,85 +74,92 @@ func MinMaxMean(data []schema.Float) (float64, float64, float64) {
}
sum += float64(val)
n += 1
if float64(val) > max {max = float64(val)}
if float64(val) < min {min = float64(val)}
if float64(val) > max {
max = float64(val)
}
if float64(val) < min {
min = float64(val)
}
}
return min, max, sum / n
}
// Rewritten from
// https://github.com/ermanh/trieregex/blob/master/trieregex/trieregex.py
func nodeRegex(nodes []string) string {
root := Trie{}
// add runes of each compute node to trie
for _, node := range nodes {
_trie := root
for _, c := range node {
if _, ok := _trie[c]; !ok {_trie[c] = Trie{}}
_trie = _trie[c]
}
_trie['*'] = Trie{}
}
// recursively build regex from rune trie
var trieRegex func(trie Trie, reset bool) string
trieRegex = func(trie Trie, reset bool) string {
if reset == true {
trie = root
}
if len(trie) == 0 {
return ""
}
if len(trie) == 1 {
for key, _trie := range trie {
if key == '*' { return "" }
return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)
}
} else {
sequences := []string{}
for key, _trie := range trie {
if key != '*' {
sequences = append(sequences, regexp.QuoteMeta(string(key)) + trieRegex(_trie, false))
}
}
sort.Slice(sequences, func(i, j int) bool {
return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j])
})
var result string
// single edge from this tree node
if len(sequences) == 1 {
result = sequences[0]
if len(result) > 1 {
result = "(?:" + result + ")"
}
// multiple edges, each length 1
} else if s := strings.Join(sequences, ""); len(s) == len(sequences) {
// char or numeric range
if len(s)-1 == int(s[len(s)-1]) - int(s[0]) {
result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1])
// char or numeric set
} else {
result = "[" + s + "]"
}
// multiple edges of different lengths
} else {
result = "(?:" + strings.Join(sequences, "|") + ")"
}
if _, ok := trie['*']; ok { result += "?"}
return result
}
return ""
}
return trieRegex(root, true)
root := Trie{}
// add runes of each compute node to trie
for _, node := range nodes {
_trie := root
for _, c := range node {
if _, ok := _trie[c]; !ok {
_trie[c] = Trie{}
}
_trie = _trie[c]
}
_trie['*'] = Trie{}
}
// recursively build regex from rune trie
var trieRegex func(trie Trie, reset bool) string
trieRegex = func(trie Trie, reset bool) string {
if reset == true {
trie = root
}
if len(trie) == 0 {
return ""
}
if len(trie) == 1 {
for key, _trie := range trie {
if key == '*' {
return ""
}
return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)
}
} else {
sequences := []string{}
for key, _trie := range trie {
if key != '*' {
sequences = append(sequences, regexp.QuoteMeta(string(key))+trieRegex(_trie, false))
}
}
sort.Slice(sequences, func(i, j int) bool {
return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j])
})
var result string
// single edge from this tree node
if len(sequences) == 1 {
result = sequences[0]
if len(result) > 1 {
result = "(?:" + result + ")"
}
// multiple edges, each length 1
} else if s := strings.Join(sequences, ""); len(s) == len(sequences) {
// char or numeric range
if len(s)-1 == int(s[len(s)-1])-int(s[0]) {
result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1])
// char or numeric set
} else {
result = "[" + s + "]"
}
// multiple edges of different lengths
} else {
result = "(?:" + strings.Join(sequences, "|") + ")"
}
if _, ok := trie['*']; ok {
result += "?"
}
return result
}
return ""
}
return trieRegex(root, true)
}
func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
var config PrometheusDataRepositoryConfig
// parse config
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err
}
// support basic authentication
@@ -163,15 +169,16 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
} else {
if config.Username != "" {
return errors.New("Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
}
}
// init client
client, err := promapi.NewClient(promapi.Config{
Address: config.Url,
Address: config.Url,
RoundTripper: rt,
})
if err != nil {
log.Error("Error while initializing new prometheus client")
return err
}
// init query client
@@ -186,15 +193,12 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
if err == nil {
log.Debugf("Added PromQL template for %s: %s", metric, templ)
} else {
log.Errorf("Failed to parse PromQL template %s for metric %s", templ, metric)
log.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric)
}
}
return nil
}
// TODO: respect scope argument
func (pdb *PrometheusDataRepository) FormatQuery(
metric string,
@@ -213,53 +217,47 @@ func (pdb *PrometheusDataRepository) FormatQuery(
if templ, ok := pdb.templates[metric]; ok {
err := templ.Execute(buf, args)
if err != nil {
return "", errors.New(fmt.Sprintf("Error compiling template %v", templ))
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ))
} else {
query := buf.String()
log.Debugf(fmt.Sprintf("PromQL: %s", query))
log.Debugf("PromQL: %s", query)
return query, nil
}
} else {
return "", errors.New(fmt.Sprintf("No PromQL for metric %s configured.", metric))
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > No PromQL for metric %s configured.", metric))
}
}
// Convert PromAPI row to CC schema.Series
func (pdb *PrometheusDataRepository) RowToSeries(
from time.Time,
step int64,
steps int64,
row *promm.SampleStream) (schema.Series) {
ts := from.Unix()
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
// init array of expected length with NaN
values := make([]schema.Float, steps + 1)
for i, _ := range values {
values[i] = schema.NaN
}
// copy recorded values from prom sample pair
for _, v := range row.Values {
idx := (v.Timestamp.Unix() - ts) / step
values[idx] = schema.Float(v.Value)
}
min, max, mean := MinMaxMean(values)
// output struct
return schema.Series{
Hostname: hostname,
Data: values,
Statistics: &schema.MetricStatistics{
Avg: mean,
Min: min,
Max: max,
},
}
from time.Time,
step int64,
steps int64,
row *promm.SampleStream) schema.Series {
ts := from.Unix()
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
// init array of expected length with NaN
values := make([]schema.Float, steps+1)
for i, _ := range values {
values[i] = schema.NaN
}
// copy recorded values from prom sample pair
for _, v := range row.Values {
idx := (v.Timestamp.Unix() - ts) / step
values[idx] = schema.Float(v.Value)
}
min, max, mean := MinMaxMean(values)
// output struct
return schema.Series{
Hostname: hostname,
Data: values,
Statistics: &schema.MetricStatistics{
Avg: mean,
Min: min,
Max: max,
},
}
}
func (pdb *PrometheusDataRepository) LoadData(
job *schema.Job,
@@ -268,7 +266,7 @@ func (pdb *PrometheusDataRepository) LoadData(
ctx context.Context) (schema.JobData, error) {
// TODO respect requested scope
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode){
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode)
}
@@ -283,36 +281,38 @@ func (pdb *PrometheusDataRepository) LoadData(
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))})
logOnce.Do(func() {
log.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue
}
for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(job.Cluster, metric)
if metricConfig == nil {
log.Errorf(fmt.Sprintf("Error in LoadData: Metric %s for cluster %s not configured",
metric, job.Cluster))
return nil, errors.New("Prometheus querry error")
log.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
return nil, errors.New("Prometheus config error")
}
query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster)
if err != nil {
log.Warn("Error while formatting prometheus query")
return nil, err
}
// ranged query over all job nodes
r := promv1.Range{
Start: from,
End: to,
Step: time.Duration(metricConfig.Timestep * 1e9),
}
Start: from,
End: to,
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf(fmt.Sprintf("Prometheus query error in LoadData: %v\nQuery: %s", err, query))
return nil, errors.New("Prometheus querry error")
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
return nil, errors.New("Prometheus query error")
}
if len(warnings) > 0 {
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings))
log.Warnf("Warnings: %v\n", warnings)
}
// init data structures
@@ -338,16 +338,13 @@ func (pdb *PrometheusDataRepository) LoadData(
}
// sort by hostname to get uniform coloring
sort.Slice(jobMetric.Series, func(i, j int) bool {
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
})
}
}
return jobData, nil
}
// TODO change implementation to precomputed/cached stats
func (pdb *PrometheusDataRepository) LoadStats(
job *schema.Job,
@@ -359,6 +356,7 @@ func (pdb *PrometheusDataRepository) LoadStats(
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil {
log.Warn("Error while loading job for stats")
return nil, err
}
for metric, metricData := range data {
@@ -371,9 +369,6 @@ func (pdb *PrometheusDataRepository) LoadStats(
return stats, nil
}
func (pdb *PrometheusDataRepository) LoadNodeData(
cluster string,
metrics, nodes []string,
@@ -390,35 +385,37 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
}
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))})
logOnce.Do(func() {
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue
}
for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(cluster, metric)
if metricConfig == nil {
log.Errorf(fmt.Sprintf("Error in LoadNodeData: Metric %s for cluster %s not configured",
metric, cluster))
return nil, errors.New("Prometheus querry error")
log.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
return nil, errors.New("Prometheus config error")
}
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
if err != nil {
log.Warn("Error while formatting prometheus query")
return nil, err
}
// ranged query over all nodes
r := promv1.Range{
Start: from,
End: to,
Step: time.Duration(metricConfig.Timestep * 1e9),
}
Start: from,
End: to,
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf(fmt.Sprintf("Prometheus query error in LoadNodeData: %v\n", err))
return nil, errors.New("Prometheus querry error")
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
return nil, errors.New("Prometheus query error")
}
if len(warnings) > 0 {
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings))
log.Warnf("Warnings: %v\n", warnings)
}
step := int64(metricConfig.Timestep)
@@ -437,13 +434,13 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
Unit: metricConfig.Unit,
Scope: scope,
Timestep: metricConfig.Timestep,
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
},
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
},
)
}
}
}
t1 := time.Since(t0)
log.Debugf(fmt.Sprintf("LoadNodeData of %v nodes took %s", len(data), t1))
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
return data, nil
}

View File

@@ -5,12 +5,15 @@
package repository
import (
"database/sql"
"fmt"
"log"
"sync"
"time"
"github.com/jmoiron/sqlx"
"github.com/mattn/go-sqlite3"
"github.com/qustavo/sqlhooks/v2"
)
var (
@@ -19,7 +22,8 @@ var (
)
type DBConnection struct {
DB *sqlx.DB
DB *sqlx.DB
Driver string
}
func Connect(driver string, db string) {
@@ -28,7 +32,9 @@ func Connect(driver string, db string) {
dbConnOnce.Do(func() {
if driver == "sqlite3" {
dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db))
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
dbHandle, err = sqlx.Open("sqlite3WithHooks", fmt.Sprintf("%s?_foreign_keys=on", db))
// dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db))
if err != nil {
log.Fatal(err)
}
@@ -39,7 +45,7 @@ func Connect(driver string, db string) {
} else if driver == "mysql" {
dbHandle, err = sqlx.Open("mysql", fmt.Sprintf("%s?multiStatements=true", db))
if err != nil {
log.Fatal(err)
log.Fatalf("sqlx.Open() error: %v", err)
}
dbHandle.SetConnMaxLifetime(time.Minute * 3)
@@ -49,7 +55,8 @@ func Connect(driver string, db string) {
log.Fatalf("unsupported database driver: %s", driver)
}
dbConnInstance = &DBConnection{DB: dbHandle}
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
checkDBVersion(driver, dbHandle.DB)
})
}

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
// Hooks satisfies the sqlhook.Hooks interface
type Hooks struct{}
// Before hook will print the query with it's args and return the context with the timestamp
func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
log.Infof("SQL query %s %q", query, args)
return context.WithValue(ctx, "begin", time.Now()), nil
}
// After hook will get the timestamp registered on the Before hook and print the elapsed time
func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
begin := ctx.Value("begin").(time.Time)
log.Infof("Took: %s\n", time.Since(begin))
return ctx, nil
}

View File

@@ -19,67 +19,6 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
// `AUTO_INCREMENT` is in a comment because of this hack:
// https://stackoverflow.com/a/41028314 (sqlite creates unique ids automatically)
const JobsDBSchema string = `
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tag;
CREATE TABLE job (
id INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
`
// Indexes are created after the job-archive is traversed for faster inserts.
const JobsDbIndexes string = `
CREATE INDEX job_by_user ON job (user);
CREATE INDEX job_by_starttime ON job (start_time);
CREATE INDEX job_by_job_id ON job (job_id);
CREATE INDEX job_by_state ON job (job_state);
`
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
@@ -95,40 +34,44 @@ func HandleImportFlag(flag string) error {
for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":")
if len(files) != 2 {
return fmt.Errorf("invalid import flag format")
return fmt.Errorf("REPOSITORY/INIT > invalid import flag format")
}
raw, err := os.ReadFile(files[0])
if err != nil {
log.Warn("Error while reading metadata file for import")
return err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job meta: %v", err)
return fmt.Errorf("REPOSITORY/INIT > validate job meta: %v", err)
}
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil {
log.Warn("Error while decoding raw json metadata for import")
return err
}
raw, err = os.ReadFile(files[1])
if err != nil {
log.Warn("Error while reading jobdata file for import")
return err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Data, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job data: %v", err)
return fmt.Errorf("REPOSITORY/INIT > validate job data: %v", err)
}
}
dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil {
log.Warn("Error while decoding raw json jobdata for import")
return err
}
@@ -136,10 +79,11 @@ func HandleImportFlag(flag string) error {
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil {
log.Warn("Error while finding job in jobRepository")
return err
}
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
}
job := schema.Job{
@@ -155,38 +99,45 @@ func HandleImportFlag(flag string) error {
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Warn("Error while marshaling job resources")
return err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
log.Warn("Error while marshaling job metadata")
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
log.Warn("BaseJob SanityChecks failed")
return err
}
if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
log.Error("Error while importing job")
return err
}
res, err := GetConnection().DB.NamedExec(NamedJobInsert, job)
if err != nil {
log.Warn("Error while NamedJobInsert")
return err
}
id, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return err
}
for _, tag := range job.Tags {
if _, err := GetJobRepository().AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
log.Error("Error while adding or creating tag")
return err
}
}
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
log.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
}
return nil
}
@@ -198,21 +149,17 @@ func InitDB() error {
starttime := time.Now()
log.Print("Building job table...")
// Basic database structure:
_, err := db.DB.Exec(JobsDBSchema)
if err != nil {
return err
}
// Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT.
tx, err := db.DB.Beginx()
if err != nil {
log.Warn("Error while bundling transactions")
return err
}
stmt, err := tx.PrepareNamed(NamedJobInsert)
if err != nil {
log.Warn("Error while preparing namedJobInsert")
return err
}
tags := make(map[string]int64)
@@ -232,12 +179,14 @@ func InitDB() error {
if i%10 == 0 {
if tx != nil {
if err := tx.Commit(); err != nil {
log.Warn("Error while committing transactions for jobMeta")
return err
}
}
tx, err = db.DB.Beginx()
if err != nil {
log.Warn("Error while bundling transactions for jobMeta")
return err
}
@@ -260,34 +209,34 @@ func InitDB() error {
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
if err := SanityChecks(&job.BaseJob); err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
res, err := stmt.Exec(job)
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
id, err := res.LastInsertId()
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
@@ -298,16 +247,19 @@ func InitDB() error {
if !ok {
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil {
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
return err
}
tagId, err = res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return err
}
tags[tagstr] = tagId
}
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", id, tagId)
return err
}
}
@@ -318,16 +270,11 @@ func InitDB() error {
}
if errorOccured > 0 {
log.Errorf("Error in import of %d jobs!", errorOccured)
log.Warnf("Error in import of %d jobs!", errorOccured)
}
if err := tx.Commit(); err != nil {
return err
}
// Create indexes after inserts so that they do not
// need to be continually updated.
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
log.Warn("Error while committing SQL transactions")
return err
}
@@ -338,13 +285,14 @@ func InitDB() error {
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := archive.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
return fmt.Errorf("no such cluster: %v", job.Cluster)
}
if err := archive.AssignSubCluster(job); err != nil {
log.Warn("Error while assigning subcluster to job")
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
return fmt.Errorf("not a valid job state: %v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")

View File

@@ -14,9 +14,11 @@ import (
"sync"
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -30,7 +32,8 @@ var (
)
type JobRepository struct {
DB *sqlx.DB
DB *sqlx.DB
driver string
stmtCache *sq.StmtCache
cache *lrucache.Cache
@@ -44,9 +47,11 @@ func GetJobRepository() *JobRepository {
db := GetConnection()
jobRepoInstance = &JobRepository{
DB: db.DB,
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
DB: db.DB,
driver: db.Driver,
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
archiveChannel: make(chan *schema.Job, 128),
}
// start archiving worker
@@ -67,14 +72,20 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources /*&job.MetaData*/); err != nil {
&job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
log.Warn("Error while unmarhsaling raw resources json")
return nil, err
}
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err
// }
job.StartTime = time.Unix(job.StartTimeUnix, 0)
if job.Duration == 0 && job.State == schema.JobStateRunning {
job.Duration = int32(time.Since(job.StartTime).Seconds())
@@ -84,11 +95,14 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
return job, nil
}
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
func (r *JobRepository) FetchJobName(job *schema.Job) (*string, error) {
start := time.Now()
cachekey := fmt.Sprintf("metadata:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.MetaData = cached.(map[string]string)
return job.MetaData, nil
if jobName := job.MetaData["jobName"]; jobName != "" {
return &jobName, nil
}
}
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
@@ -105,6 +119,40 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
}
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
log.Infof("Timer FetchJobName %s", time.Since(start))
if jobName := job.MetaData["jobName"]; jobName != "" {
return &jobName, nil
} else {
return new(string), nil
}
}
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
start := time.Now()
cachekey := fmt.Sprintf("metadata:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.MetaData = cached.(map[string]string)
return job.MetaData, nil
}
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
log.Warn("Error while scanning for job metadata")
return nil, err
}
if len(job.RawMetaData) == 0 {
return nil, nil
}
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
log.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
log.Infof("Timer FetchMetadata %s", time.Since(start))
return job.MetaData, nil
}
@@ -113,6 +161,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
r.cache.Del(cachekey)
if job.MetaData == nil {
if _, err = r.FetchMetadata(job); err != nil {
log.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
return err
}
}
@@ -129,10 +178,12 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
}
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
log.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
return err
}
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
return err
}
@@ -150,6 +201,7 @@ func (r *JobRepository) Find(
cluster *string,
startTime *int64) (*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
@@ -160,6 +212,7 @@ func (r *JobRepository) Find(
q = q.Where("job.start_time = ?", *startTime)
}
log.Infof("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
@@ -173,6 +226,7 @@ func (r *JobRepository) FindAll(
cluster *string,
startTime *int64) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
@@ -185,6 +239,7 @@ func (r *JobRepository) FindAll(
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -192,10 +247,12 @@ func (r *JobRepository) FindAll(
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Infof("Timer FindAll %s", time.Since(start))
return jobs, nil
}
@@ -214,12 +271,12 @@ func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("encoding resources field failed: %w", err)
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("encoding metaData field failed: %w", err)
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
res, err := r.DB.NamedExec(`INSERT INTO job (
@@ -259,7 +316,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime)
if err != nil {
log.Warnf(" DeleteJobsBefore(%d): error %v", startTime, err)
log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err)
} else {
log.Infof("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
}
@@ -269,7 +326,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
func (r *JobRepository) DeleteJobById(id int64) error {
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id)
if err != nil {
log.Warnf("DeleteJobById(%d): error %v", id, err)
log.Errorf("DeleteJobById(%d): error %#v", id, err)
} else {
log.Infof("DeleteJobById(%d): Success", id)
}
@@ -278,6 +335,7 @@ func (r *JobRepository) DeleteJobById(id int64) error {
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggregate, filters []*model.JobFilter, weight *model.Weights, limit *int) (map[string]int, error) {
start := time.Now()
if !aggreg.IsValid() {
return nil, errors.New("invalid aggregate")
}
@@ -292,10 +350,12 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
now := time.Now().Unix()
count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now)
runner = r.DB
default:
log.Infof("CountGroupedJobs() Weight %v unknown.", *weight)
}
}
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job." + string(aggreg)).OrderBy("count DESC"))
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job."+string(aggreg)).OrderBy("count DESC"))
if qerr != nil {
return nil, qerr
@@ -311,6 +371,7 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
counts := map[string]int{}
rows, err := q.RunWith(runner).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -318,12 +379,14 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
var group string
var count int
if err := rows.Scan(&group, &count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
counts[group] = count
}
log.Infof("Timer CountGroupedJobs %s", time.Since(start))
return counts, nil
}
@@ -360,20 +423,23 @@ func (r *JobRepository) MarkArchived(
stmt = stmt.Set("net_bw_avg", stats.Avg)
case "file_bw":
stmt = stmt.Set("file_bw_avg", stats.Avg)
default:
log.Infof("MarkArchived() Metric '%v' unknown", metric)
}
}
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
log.Warn("Error while marking job as archived")
return err
}
return nil
}
// Archiving worker thread
func (r *JobRepository) archivingWorker(){
func (r *JobRepository) archivingWorker() {
for {
select {
case job, ok := <- r.archiveChannel:
case job, ok := <-r.archiveChannel:
if !ok {
break
}
@@ -407,54 +473,216 @@ func (r *JobRepository) archivingWorker(){
}
// Trigger async archiving
func (r *JobRepository) TriggerArchiving(job *schema.Job){
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
r.archivePending.Add(1)
r.archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func (r *JobRepository) WaitForArchiving(){
func (r *JobRepository) WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
r.archivePending.Wait()
}
var ErrNotFound = errors.New("no such job or user")
var ErrNotFound = errors.New("no such jobname, project or user")
var ErrForbidden = errors.New("not authorized")
// FindJobOrUser returns a job database ID or a username if a job or user machtes the search term.
// As 0 is a valid job id, check if username is "" instead in order to check what machted.
// FindJobnameOrUserOrProject returns a jobName or a username or a projectId if a jobName or user or project matches the search term.
// If query is found to be an integer (= conversion to INT datatype succeeds), skip back to parent call
// If nothing matches the search, `ErrNotFound` is returned.
func (r *JobRepository) FindJobOrUser(ctx context.Context, searchterm string) (job int64, username string, err error) {
func (r *JobRepository) FindJobnameOrUserOrProject(ctx context.Context, searchterm string) (metasnip string, username string, project string, err error) {
user := auth.GetUser(ctx)
if id, err := strconv.Atoi(searchterm); err == nil {
qb := sq.Select("job.id").From("job").Where("job.job_id = ?", id)
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
return "", "", "", nil
} else { // has to have letters
if user != nil && user.HasNotRoles([]string{auth.RoleAdmin, auth.RoleSupport}) {
qb = qb.Where("job.user = ?", user.Username)
err := sq.Select("job.user").Distinct().From("job").
Where("job.user = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if err != nil && err != sql.ErrNoRows {
return "", "", "", err
} else if err == nil {
return "", username, "", nil
}
if username == "" { // Try with Name2Username query
errtwo := sq.Select("user.username").Distinct().From("user").
Where("user.name LIKE ?", fmt.Sprint("%"+searchterm+"%")).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if errtwo != nil && errtwo != sql.ErrNoRows {
return "", "", "", errtwo
} else if errtwo == nil {
return "", username, "", nil
}
}
}
err := qb.RunWith(r.stmtCache).QueryRow().Scan(&job)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("job.project").Distinct().From("job").
Where("job.project = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&project)
if err != nil && err != sql.ErrNoRows {
return "", "", "", err
} else if err == nil {
return "", "", project, nil
}
}
// All Authorizations: If unlabeled query not username or projectId, try for jobname: Match Metadata, on hit, parent method redirects to jobName GQL query
err := sq.Select("job.cluster").Distinct().From("job").
Where("job.meta_data LIKE ?", "%"+searchterm+"%").
RunWith(r.stmtCache).QueryRow().Scan(&metasnip)
if err != nil && err != sql.ErrNoRows {
return 0, "", err
return "", "", "", err
} else if err == nil {
return job, "", nil
return metasnip[0:1], "", "", nil
}
}
return "", "", "", ErrNotFound
}
}
func (r *JobRepository) FindUser(ctx context.Context, searchterm string) (username string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasAnyRole([]string{auth.RoleAdmin, auth.RoleSupport}) {
err := sq.Select("job.user").Distinct().From("job").
Where("job.user = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if err != nil && err != sql.ErrNoRows {
return 0, "", err
return "", err
} else if err == nil {
return 0, username, nil
return username, nil
}
}
return "", ErrNotFound
return 0, "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Username -> %s: Forbidden", user.Name, searchterm)
return "", ErrForbidden
}
}
func (r *JobRepository) FindUserByName(ctx context.Context, searchterm string) (username string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("user.username").Distinct().From("user").
Where("user.name = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if err != nil && err != sql.ErrNoRows {
return "", err
} else if err == nil {
return username, nil
}
return "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Name -> %s: Forbidden", user.Name, searchterm)
return "", ErrForbidden
}
}
func (r *JobRepository) FindUsers(ctx context.Context, searchterm string) (usernames []string, err error) {
user := auth.GetUser(ctx)
emptyResult := make([]string, 0)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
rows, err := sq.Select("job.user").Distinct().From("job").
Where("job.user LIKE ?", fmt.Sprint("%", searchterm, "%")).
RunWith(r.stmtCache).Query()
if err != nil && err != sql.ErrNoRows {
return emptyResult, err
} else if err == nil {
for rows.Next() {
var name string
err := rows.Scan(&name)
if err != nil {
rows.Close()
log.Warnf("Error while scanning rows: %v", err)
return emptyResult, err
}
usernames = append(usernames, name)
}
return usernames, nil
}
return emptyResult, ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Usernames -> %s: Forbidden", user.Name, searchterm)
return emptyResult, ErrForbidden
}
}
func (r *JobRepository) FindUsersByName(ctx context.Context, searchterm string) (usernames []string, err error) {
user := auth.GetUser(ctx)
emptyResult := make([]string, 0)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
rows, err := sq.Select("user.username").Distinct().From("user").
Where("user.name LIKE ?", fmt.Sprint("%", searchterm, "%")).
RunWith(r.stmtCache).Query()
if err != nil && err != sql.ErrNoRows {
return emptyResult, err
} else if err == nil {
for rows.Next() {
var username string
err := rows.Scan(&username)
if err != nil {
rows.Close()
log.Warnf("Error while scanning rows: %v", err)
return emptyResult, err
}
usernames = append(usernames, username)
}
return usernames, nil
}
return emptyResult, ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query name -> %s: Forbidden", user.Name, searchterm)
return emptyResult, ErrForbidden
}
}
func (r *JobRepository) FindNameByUser(ctx context.Context, searchterm string) (name string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("user.name").Distinct().From("user").
Where("user.username = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&name)
if err != nil && err != sql.ErrNoRows {
return "", err
} else if err == nil {
return name, nil
}
return "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Name -> %s: Forbidden", user.Name, searchterm)
return "", ErrForbidden
}
}
func (r *JobRepository) FindProject(ctx context.Context, searchterm string) (project string, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("job.project").Distinct().From("job").
Where("job.project = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&project)
if err != nil && err != sql.ErrNoRows {
return "", err
} else if err == nil {
return project, nil
}
return "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query Project -> %s: Forbidden", user.Name, project)
return "", ErrForbidden
}
}
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
var err error
start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
@@ -466,18 +694,22 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
if err != nil {
return nil, err
}
log.Infof("Timer Partitions %s", time.Since(start))
return partitions.([]string), nil
}
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
// Hosts with zero jobs running on them will not show up!
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
start := time.Now()
subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job").
Where("job.job_state = 'running'").
Where("job.cluster = ?", cluster).
RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -488,9 +720,11 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
var resources []*schema.Resource
var subcluster string
if err := rows.Scan(&raw, &subcluster); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if err := json.Unmarshal(raw, &resources); err != nil {
log.Warn("Error while unmarshaling raw resources json")
return nil, err
}
@@ -505,10 +739,13 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
}
}
log.Infof("Timer AllocatedNodes %s", time.Since(start))
return subclusters, nil
}
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now()
res, err := sq.Update("job").
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
Set("duration", 0).
@@ -518,16 +755,243 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
RunWith(r.DB).Exec()
if err != nil {
log.Warn("Error while stopping jobs exceeding walltime")
return err
}
rowsAffected, err := res.RowsAffected()
if err != nil {
log.Warn("Error while fetching affected rows after stopping due to exceeded walltime")
return err
}
if rowsAffected > 0 {
log.Warnf("%d jobs have been marked as failed due to running too long", rowsAffected)
log.Infof("%d jobs have been marked as failed due to running too long", rowsAffected)
}
log.Infof("Timer StopJobsExceedingWalltimeBy %s", time.Since(start))
return nil
}
// TODO: Move to config
const ShortJobDuration int = 5 * 60
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *JobRepository) JobsStatistics(ctx context.Context,
filter []*model.JobFilter,
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
start := time.Now()
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
stats := map[string]*model.JobsStatistics{}
var castType string
if r.driver == "sqlite3" {
castType = "int"
} else if r.driver == "mysql" {
castType = "unsigned"
}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as %s)", subcluster.SocketsPerNode, subcluster.CoresPerSocket, castType)
var rawQuery sq.SelectBuilder
if groupBy == nil {
rawQuery = sq.Select(
"''",
"COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
corehoursCol,
).From("job")
} else {
col := groupBy2column[*groupBy]
rawQuery = sq.Select(
col,
"COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
corehoursCol,
).From("job").GroupBy(col)
}
rawQuery = rawQuery.
Where("job.cluster = ?", cluster.Name).
Where("job.subcluster = ?", subcluster.Name)
query, qerr := SecurityCheck(ctx, rawQuery)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
log.Warn("Error while querying DB for job statistics")
return nil, err
}
for rows.Next() {
var id sql.NullString
var jobs, walltime, corehours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
if s, ok := stats[id.String]; ok {
s.TotalJobs += int(jobs.Int64)
s.TotalWalltime += int(walltime.Int64)
s.TotalCoreHours += int(corehours.Int64)
} else {
stats[id.String] = &model.JobsStatistics{
ID: id.String,
TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64),
TotalCoreHours: int(corehours.Int64),
}
}
}
}
}
}
if groupBy == nil {
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query, qerr := SecurityCheck(ctx, query)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
log.Warn("Error while scanning rows for short job stats")
return nil, err
}
} else {
col := groupBy2column[*groupBy]
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query, qerr := SecurityCheck(ctx, query)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
log.Warn("Error while querying jobs for short jobs")
return nil, err
}
for rows.Next() {
var id sql.NullString
var shortJobs sql.NullInt64
if err := rows.Scan(&id, &shortJobs); err != nil {
log.Warn("Error while scanning rows for short jobs")
return nil, err
}
if id.Valid {
stats[id.String].ShortJobs = int(shortJobs.Int64)
}
}
}
// Calculating the histogram data is expensive, so only do it if needed.
// An explicit resolver can not be used because we need to know the filters.
histogramsNeeded := false
fields := graphql.CollectFieldsCtx(ctx, nil)
for _, col := range fields {
if col.Name == "histDuration" || col.Name == "histNumNodes" {
histogramsNeeded = true
}
}
res := make([]*model.JobsStatistics, 0, len(stats))
for _, stat := range stats {
res = append(res, stat)
id, col := "", ""
if groupBy != nil {
id = stat.ID
col = groupBy2column[*groupBy]
}
if histogramsNeeded {
var err error
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
if err != nil {
log.Warn("Error while loading job statistics histogram: running jobs")
return nil, err
}
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
if err != nil {
log.Warn("Error while loading job statistics histogram: num nodes")
return nil, err
}
}
}
log.Infof("Timer JobStatistics %s", time.Since(start))
return res, nil
}
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
// to add a condition to the query of the kind "<col> = <id>".
func (r *JobRepository) jobsStatisticsHistogram(ctx context.Context,
value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
start := time.Now()
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
query, qerr := SecurityCheck(ctx, sq.Select(value, "COUNT(job.id) AS count").From("job"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
if len(id) != 0 && len(col) != 0 {
query = query.Where(col+" = ?", id)
}
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
points := make([]*model.HistoPoint, 0)
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
points = append(points, &point)
}
log.Infof("Timer jobsStatisticsHistogram %s", time.Since(start))
return points, nil
}

View File

@@ -8,10 +8,12 @@ import (
"fmt"
"testing"
"github.com/ClusterCockpit/cc-backend/pkg/log"
_ "github.com/mattn/go-sqlite3"
)
func init() {
log.Init("info", true)
Connect("sqlite3", "../../test/test.db")
}

View File

@@ -0,0 +1,109 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"database/sql"
"embed"
"fmt"
"os"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/golang-migrate/migrate/v4"
"github.com/golang-migrate/migrate/v4/database/mysql"
"github.com/golang-migrate/migrate/v4/database/sqlite3"
"github.com/golang-migrate/migrate/v4/source/iofs"
)
const supportedVersion uint = 2
//go:embed migrations/*
var migrationFiles embed.FS
func checkDBVersion(backend string, db *sql.DB) {
var m *migrate.Migrate
if backend == "sqlite3" {
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
if err != nil {
log.Fatal(err)
}
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver)
if err != nil {
log.Fatal(err)
}
} else if backend == "mysql" {
driver, err := mysql.WithInstance(db, &mysql.Config{})
if err != nil {
log.Fatal(err)
}
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithInstance("iofs", d, "mysql", driver)
if err != nil {
log.Fatal(err)
}
}
v, _, err := m.Version()
if err != nil {
if err == migrate.ErrNilVersion {
log.Warn("Legacy database without version or missing database file!")
} else {
log.Fatal(err)
}
}
if v < supportedVersion {
log.Warnf("Unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend --migrate-db", v, supportedVersion)
os.Exit(0)
}
if v > supportedVersion {
log.Warnf("Unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool!", v, supportedVersion)
os.Exit(0)
}
}
func MigrateDB(backend string, db string) {
var m *migrate.Migrate
if backend == "sqlite3" {
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
if err != nil {
log.Fatal(err)
}
} else if backend == "mysql" {
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
if err != nil {
log.Fatal(err)
}
}
if err := m.Up(); err != nil {
log.Fatal(err)
}
m.Close()
}

View File

@@ -0,0 +1,5 @@
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tags;
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS configuration;
DROP TABLE IF EXISTS user;

View File

@@ -0,0 +1,62 @@
CREATE TABLE IF NOT EXISTS job (
id INTEGER AUTO_INCREMENT PRIMARY KEY ,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
`partition` VARCHAR(255) NOT NULL,
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE IF NOT EXISTS tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE IF NOT EXISTS jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);

View File

@@ -0,0 +1,5 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_by_state;

View File

@@ -0,0 +1,5 @@
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);

View File

@@ -0,0 +1,5 @@
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tags;
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS configuration;
DROP TABLE IF EXISTS user;

View File

@@ -0,0 +1,62 @@
CREATE TABLE IF NOT EXISTS job (
id INTEGER PRIMARY KEY,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
partition VARCHAR(255) NOT NULL,
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE IF NOT EXISTS tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE IF NOT EXISTS jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);

View File

@@ -0,0 +1,5 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_by_state;

View File

@@ -0,0 +1,5 @@
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);

View File

@@ -39,7 +39,7 @@ func (r *JobRepository) QueryJobs(
} else if order.Order == model.SortDirectionEnumDesc {
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
} else {
return nil, errors.New("invalid sorting order")
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
}
}
@@ -54,12 +54,14 @@ func (r *JobRepository) QueryJobs(
sql, args, err := query.ToSql()
if err != nil {
log.Warn("Error while converting query to sql")
return nil, err
}
log.Debugf("SQL query: `%s`, args: %#v", sql, args)
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -68,6 +70,7 @@ func (r *JobRepository) QueryJobs(
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
@@ -135,6 +138,9 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.JobName != nil {
query = buildStringCondition("job.meta_data", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
@@ -217,6 +223,13 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
if cond.Contains != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
if cond.In != nil {
queryUsers := make([]string, len(cond.In))
for i, val := range cond.In {
queryUsers[i] = val
}
return query.Where(sq.Or{sq.Eq{"job.user": queryUsers}})
}
return query
}
@@ -226,7 +239,7 @@ var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
func toSnakeCase(str string) string {
for _, c := range str {
if c == '\'' || c == '\\' {
panic("A hacker (probably not)!!!")
log.Panic("toSnakeCase() attack vector!")
}
}

View File

@@ -9,22 +9,26 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
sq "github.com/Masterminds/squirrel"
)
// Add the tag with id `tagId` to the job with the database id `jobId`.
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil {
log.Error("Error while running query")
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
tags, err := r.GetTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
@@ -34,16 +38,19 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
// Removes a tag from a job
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil {
log.Error("Error while running query")
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
tags, err := r.GetTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
@@ -144,6 +151,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -151,6 +159,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
tags = append(tags, tag)

View File

@@ -6,13 +6,13 @@ package repository
import (
"encoding/json"
"log"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/jmoiron/sqlx"
)
@@ -33,21 +33,9 @@ func GetUserCfgRepo() *UserCfgRepo {
userCfgRepoOnce.Do(func() {
db := GetConnection()
_, err := db.DB.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
log.Fatal(err)
}
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil {
log.Fatal(err)
log.Fatalf("db.DB.Preparex() error: %v", err)
}
userCfgRepoInstance = &UserCfgRepo{
@@ -82,6 +70,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
rows, err := uCfg.Lookup.Query(user.Username)
if err != nil {
log.Warnf("Error while looking up user config for user '%v'", user.Username)
return err, 0, 0
}
@@ -90,11 +79,13 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
log.Warn("Error while scanning user config values")
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
log.Warn("Error while unmarshaling raw user config json")
return err, 0, 0
}
@@ -106,6 +97,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
return config, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
@@ -122,6 +114,7 @@ func (uCfg *UserCfgRepo) UpdateConfig(
if user == nil {
var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil {
log.Warn("Error while unmarshaling raw user config json")
return err
}
@@ -131,8 +124,8 @@ func (uCfg *UserCfgRepo) UpdateConfig(
return nil
}
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
user.Username, key, value); err != nil {
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`, user, key, value); err != nil {
log.Warnf("Error while replacing user config in DB for user '%v'", user)
return err
}

View File

@@ -12,8 +12,8 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -44,7 +44,7 @@ var routes []Route = []Route{
{"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute},
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> - ClusterCockpit", false, setupClusterRoute},
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analaysis - ClusterCockpit", true, setupAnalysisRoute},
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterRoute},
}
@@ -61,21 +61,21 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
State: []schema.JobState{schema.JobStateRunning},
}}, nil, nil)
if err != nil {
log.Errorf("failed to count jobs: %s", err.Error())
log.Warnf("failed to count jobs: %s", err.Error())
runningJobs = map[string]int{}
}
totalJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, nil, nil, nil)
if err != nil {
log.Errorf("failed to count jobs: %s", err.Error())
log.Warnf("failed to count jobs: %s", err.Error())
totalJobs = map[string]int{}
}
from := time.Now().Add(-24 * time.Hour)
recentShortJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
StartTime: &schema.TimeRange{From: &from, To: nil},
Duration: &schema.IntRange{From: 0, To: graph.ShortJobDuration},
Duration: &schema.IntRange{From: 0, To: repository.ShortJobDuration},
}}, nil, nil)
if err != nil {
log.Errorf("failed to count jobs: %s", err.Error())
log.Warnf("failed to count jobs: %s", err.Error())
recentShortJobs = map[string]int{}
}
@@ -158,7 +158,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
tags, counts, err := jobRepo.CountTags(username, projects)
tagMap := make(map[string][]map[string]interface{})
if err != nil {
log.Errorf("GetTags failed: %s", err.Error())
log.Warnf("GetTags failed: %s", err.Error())
i["tagmap"] = tagMap
return i
}
@@ -188,9 +188,17 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
filterPresets["project"] = query.Get("project")
filterPresets["projectMatch"] = "eq"
}
if query.Get("user") != "" {
filterPresets["user"] = query.Get("user")
filterPresets["userMatch"] = "eq"
if query.Get("jobName") != "" {
filterPresets["jobName"] = query.Get("jobName")
}
if len(query["user"]) != 0 {
if len(query["user"]) == 1 {
filterPresets["user"] = query.Get("user")
filterPresets["userMatch"] = "contains"
} else {
filterPresets["user"] = query["user"]
filterPresets["userMatch"] = "in"
}
}
if len(query["state"]) != 0 {
filterPresets["state"] = query["state"]
@@ -301,3 +309,81 @@ func SetupRoutes(router *mux.Router, version string, hash string, buildTime stri
})
}
}
func HandleSearchBar(rw http.ResponseWriter, r *http.Request, api *api.RestApi) {
if search := r.URL.Query().Get("searchId"); search != "" {
splitSearch := strings.Split(search, ":")
if len(splitSearch) == 2 {
switch strings.Trim(splitSearch[0], " ") {
case "jobId":
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
return
case "jobName":
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
return
case "projectId":
project, _ := api.JobRepository.FindProject(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: projectId
if project != "" {
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(project), http.StatusTemporaryRedirect)
return
} else {
http.Redirect(rw, r, "/monitoring/jobs/?jobId=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
}
case "username":
usernames, _ := api.JobRepository.FindUsers(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: usernames
if len(usernames) == 1 {
http.Redirect(rw, r, "/monitoring/user/"+usernames[0], http.StatusTemporaryRedirect) // One Match: Redirect to User View
return
} else if len(usernames) > 1 {
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // > 1 Matches: Redirect to user table
return
} else {
http.Redirect(rw, r, "/monitoring/users/?user=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
}
case "name":
usernames, _ := api.JobRepository.FindUsersByName(r.Context(), strings.Trim(splitSearch[1], " ")) // Restricted: usernames queried by name
if len(usernames) == 1 {
http.Redirect(rw, r, "/monitoring/user/"+usernames[0], http.StatusTemporaryRedirect)
return
} else if len(usernames) > 1 {
joinedNames := strings.Join(usernames, "&user=")
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames, http.StatusTemporaryRedirect) // > 1 Matches: Redirect to user table
return
} else {
http.Redirect(rw, r, "/monitoring/users/?user=NotFound", http.StatusTemporaryRedirect) // Workaround to display correctly empty table
}
default:
http.Error(rw, "'searchId' type parameter unknown", http.StatusBadRequest)
}
} else if len(splitSearch) == 1 {
jobname, username, project, err := api.JobRepository.FindJobnameOrUserOrProject(r.Context(), strings.Trim(search, " ")) // Determine Access within
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
if username != "" {
http.Redirect(rw, r, "/monitoring/user/"+username, http.StatusTemporaryRedirect) // User: Redirect to user page
return
} else if project != "" {
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // projectId (equal)
return
} else if jobname != "" {
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // JobName (contains)
return
} else {
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // No Result: Probably jobId
return
}
} else {
http.Error(rw, "'searchId' query parameter malformed", http.StatusBadRequest)
}
} else {
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect)
}
}

View File

@@ -14,6 +14,8 @@ import (
"strconv"
"strings"
"syscall"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
// Very simple and limited .env file reader.
@@ -22,6 +24,7 @@ import (
func LoadEnv(file string) error {
f, err := os.Open(file)
if err != nil {
log.Error("Error while opening file")
return err
}
@@ -40,14 +43,14 @@ func LoadEnv(file string) error {
line = strings.TrimPrefix(line, "export ")
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
return fmt.Errorf("unsupported line: %#v", line)
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
}
key := strings.TrimSpace(parts[0])
val := strings.TrimSpace(parts[1])
if strings.HasPrefix(val, "\"") {
if !strings.HasSuffix(val, "\"") {
return fmt.Errorf("unsupported line: %#v", line)
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
}
runes := []rune(val[1 : len(val)-1])
@@ -65,7 +68,7 @@ func LoadEnv(file string) error {
case '"':
sb.WriteRune('"')
default:
return fmt.Errorf("unsupprorted escape sequence in quoted string: backslash %#v", runes[i])
return fmt.Errorf("RUNTIME/SETUP > unsupported escape sequence in quoted string: backslash %#v", runes[i])
}
continue
}
@@ -89,11 +92,13 @@ func DropPrivileges(username string, group string) error {
if group != "" {
g, err := user.LookupGroup(group)
if err != nil {
log.Warn("Error while looking up group")
return err
}
gid, _ := strconv.Atoi(g.Gid)
if err := syscall.Setgid(gid); err != nil {
log.Warn("Error while setting gid")
return err
}
}
@@ -101,11 +106,13 @@ func DropPrivileges(username string, group string) error {
if username != "" {
u, err := user.Lookup(username)
if err != nil {
log.Warn("Error while looking up user")
return err
}
uid, _ := strconv.Atoi(u.Uid)
if err := syscall.Setuid(uid); err != nil {
log.Warn("Error while setting uid")
return err
}
}