Merge pull request #553 from ClusterCockpit/release/v1.5

Release/v1.5
This commit is contained in:
Jan Eitzinger
2026-06-04 20:33:22 +02:00
committed by GitHub
23 changed files with 3743 additions and 4639 deletions

View File

@@ -170,6 +170,7 @@ func setup(t *testing.T) *api.RestAPI {
archiver.Start(repository.GetJobRepository(), context.Background())
t.Setenv("SESSION_KEY", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=")
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
auth.Init(&cfg)
} else {

View File

@@ -398,11 +398,6 @@ const docTemplate = `{
},
"/api/jobs/edit_meta/": {
"patch": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Edit key value pairs in metadata json of job specified by jobID, StartTime and Cluster\nIf a key already exists its content will be overwritten",
"consumes": [
"application/json"
@@ -413,7 +408,7 @@ const docTemplate = `{
"tags": [
"Job add and modify"
],
"summary": "Edit meta-data json by request",
"summary": "Edit meta-data json of job identified by request",
"parameters": [
{
"description": "Specifies job and payload to add or update",
@@ -456,12 +451,17 @@ const docTemplate = `{
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
},
"security": [
{
"ApiKeyAuth": []
}
]
}
},
"/api/jobs/edit_meta/{id}": {
"patch": {
"description": "Edit key value pairs in job metadata json\nIf a key already exists its content will be overwritten",
"description": "Edit key value pairs in job metadata json of job specified by database id\nIf a key already exists its content will be overwritten",
"consumes": [
"application/json"
],
@@ -471,7 +471,7 @@ const docTemplate = `{
"tags": [
"Job add and modify"
],
"summary": "Edit meta-data json",
"summary": "Edit meta-data json of job identified by database id",
"parameters": [
{
"type": "integer",
@@ -481,7 +481,7 @@ const docTemplate = `{
"required": true
},
{
"description": "Kay value pair to add",
"description": "Metadata Key value pair to add or update",
"name": "request",
"in": "body",
"required": true,
@@ -743,6 +743,64 @@ const docTemplate = `{
]
}
},
"/api/jobs/used_nodes": {
"get": {
"description": "Get a map of cluster names to lists of unique hostnames that are currently in use by running jobs that started before the specified timestamp.",
"produces": [
"application/json"
],
"tags": [
"Job query"
],
"summary": "Lists used nodes by cluster",
"parameters": [
{
"type": "integer",
"description": "Unix timestamp to filter jobs (jobs with start_time \u003c ts)",
"name": "ts",
"in": "query",
"required": true
}
],
"responses": {
"200": {
"description": "Map of cluster names to hostname lists",
"schema": {
"$ref": "#/definitions/api.GetUsedNodesAPIResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
},
"security": [
{
"ApiKeyAuth": []
}
]
}
},
"/api/jobs/{id}": {
"get": {
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
@@ -1375,63 +1433,6 @@ const docTemplate = `{
]
}
},
"/healthcheck/": {
"get": {
"description": "This endpoint allows the users to check if a node is healthy",
"produces": [
"application/json"
],
"tags": [
"healthcheck"
],
"summary": "HealthCheck endpoint",
"parameters": [
{
"type": "string",
"description": "Selector",
"name": "selector",
"in": "query"
}
],
"responses": {
"200": {
"description": "Debug dump",
"schema": {
"type": "string"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
},
"security": [
{
"ApiKeyAuth": []
}
]
}
},
"/jobs/tag_job/{id}": {
"delete": {
"description": "Removes tag(s) from a job specified by DB ID. Name and Type of Tag(s) must match.\nTag Scope is required for matching, options: \"global\", \"admin\". Private tags can not be deleted via API.\nIf tagged job is already finished: Tag will be removed from respective archive files.",
@@ -2062,6 +2063,52 @@ const docTemplate = `{
}
}
},
"api.GetUsedNodesAPIResponse": {
"type": "object",
"properties": {
"usedNodes": {
"description": "Map of cluster names to lists of used node hostnames",
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "string"
}
}
}
}
},
"api.JobMetaRequest": {
"type": "object",
"required": [
"jobId"
],
"properties": {
"cluster": {
"description": "Cluster of job",
"type": "string",
"example": "fritz"
},
"jobId": {
"description": "Cluster Job ID of job",
"type": "integer",
"example": 123000
},
"payload": {
"description": "Content to Add to Job Meta_Data",
"allOf": [
{
"$ref": "#/definitions/api.EditMetaRequest"
}
]
},
"startTime": {
"description": "Start Time of job as epoch",
"type": "integer",
"example": 1649723812
}
}
},
"api.JobMetricWithName": {
"type": "object",
"properties": {
@@ -2194,13 +2241,6 @@ const docTemplate = `{
"format": "float64"
}
},
"exclusive": {
"description": "for backwards compatibility",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"footprint": {
"type": "object",
"additionalProperties": {

View File

@@ -166,6 +166,10 @@ func (api *RestAPI) getJobs(rw http.ResponseWriter, r *http.Request) {
handleError(err, http.StatusBadRequest, rw)
return
}
if x < 1 {
handleError(fmt.Errorf("page must be >= 1"), http.StatusBadRequest, rw)
return
}
page.Page = x
case "items-per-page":
x, err := strconv.Atoi(vals[0])
@@ -173,6 +177,10 @@ func (api *RestAPI) getJobs(rw http.ResponseWriter, r *http.Request) {
handleError(err, http.StatusBadRequest, rw)
return
}
if x < 1 {
handleError(fmt.Errorf("items-per-page must be >= 1"), http.StatusBadRequest, rw)
return
}
page.ItemsPerPage = x
case "with-metadata":
withMetadata = true

View File

@@ -151,7 +151,10 @@ func (api *NatsAPI) StartSubscriptions() error {
return err
}
cclog.Info("NATS API subscriptions started")
cclog.Warnf("NATS API subscriptions started on subjects %q and %q — these are UNAUTHENTICATED: "+
"anyone with publish rights on the broker can start/stop jobs and update node state. "+
"Restrict publish ACLs on the NATS broker to trusted producers only.",
s.SubjectJobEvent, s.SubjectNodeState)
}
return nil
}

View File

@@ -156,6 +156,7 @@ func setupNatsTest(t *testing.T) *NatsAPI {
archiver.Start(repository.GetJobRepository(), context.Background())
t.Setenv("SESSION_KEY", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=")
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
auth.Init(&cfg)
} else {

View File

@@ -9,7 +9,6 @@ package auth
import (
"bytes"
"context"
"crypto/rand"
"database/sql"
"encoding/base64"
"encoding/json"
@@ -17,6 +16,7 @@ import (
"fmt"
"net"
"net/http"
"net/url"
"os"
"sync"
"time"
@@ -187,19 +187,15 @@ func Init(authCfg *json.RawMessage) {
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
cclog.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
cclog.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
cclog.Fatal("Error while initializing authentication -> decoding session key failed")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
cclog.Fatal("environment variable 'SESSION_KEY' not set: refusing to start with an ephemeral session key. " +
"Set SESSION_KEY in .env (base64-encoded 32 random bytes); a random key would invalidate all sessions on every restart " +
"and prevent sessions from validating across replicas.")
}
keyBytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
cclog.Fatal("Error while initializing authentication -> decoding session key failed")
}
authInstance.sessionStore = sessions.NewCookieStore(keyBytes)
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err == nil {
authInstance.SessionMaxAge = d
@@ -325,6 +321,7 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
session.Options.Secure = false
}
session.Options.SameSite = http.SameSiteLaxMode
session.Options.HttpOnly = true
session.Values["username"] = user.Username
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
@@ -388,9 +385,12 @@ func (auth *Authentication) Login(
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
if r.FormValue("redirect") != "" {
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
return
if redirect := r.FormValue("redirect"); redirect != "" {
if u, perr := url.Parse(redirect); perr == nil && u.Scheme == "" && u.Host == "" {
http.RedirectHandler(redirect, http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
return
}
cclog.Warnf("login redirect rejected (not a relative path): %q", redirect)
}
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))

View File

@@ -9,6 +9,7 @@ import (
"crypto/ed25519"
"encoding/base64"
"errors"
"fmt"
"net/http"
"os"
@@ -119,22 +120,26 @@ func (ja *JWTCookieSessionAuthenticator) Login(
rawtoken = jwtCookie.Value
}
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
if t.Method != jwt.SigningMethodEdDSA {
return nil, errors.New("only Ed25519/EdDSA supported")
}
parser := jwt.NewParser(jwt.WithValidMethods([]string{jwt.SigningMethodEdDSA.Alg()}))
unvalidatedIssuer, success := t.Claims.(jwt.MapClaims)["iss"].(string)
if success && unvalidatedIssuer == jc.TrustedIssuer {
// The (unvalidated) issuer seems to be the expected one,
// use public cross login key from config
return ja.publicKeyCrossLogin, nil
}
unverified, _, perr := parser.ParseUnverified(rawtoken, jwt.MapClaims{})
if perr != nil {
cclog.Warn("JWT cookie session: error while parsing token")
return nil, perr
}
issuer, _ := unverified.Claims.(jwt.MapClaims)["iss"].(string)
// No cross login key configured or issuer not expected
// Try own key
return ja.publicKey, nil
})
var key any
switch issuer {
case jc.TrustedIssuer:
key = ja.publicKeyCrossLogin
case "":
key = ja.publicKey
default:
return nil, fmt.Errorf("untrusted JWT issuer: %q", issuer)
}
token, err := parser.Parse(rawtoken, func(*jwt.Token) (any, error) { return key, nil })
if err != nil {
cclog.Warn("JWT cookie session: error while parsing token")
return nil, err

View File

@@ -25,15 +25,21 @@ type JWTSessionAuthenticator struct {
var _ Authenticator = (*JWTSessionAuthenticator)(nil)
func (ja *JWTSessionAuthenticator) Init() error {
if pubKey := os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
cclog.Warn("Could not decode cross login JWT HS512 key")
return err
}
ja.loginTokenKey = bytes
pubKey := os.Getenv("CROSS_LOGIN_JWT_HS512_KEY")
if pubKey == "" {
// Without a configured key the HMAC verification below would run against
// an empty key, which lets anyone forge a valid token. Refuse to register
// the authenticator in that case so JWT session login is simply disabled.
return errors.New("CROSS_LOGIN_JWT_HS512_KEY not set: JWT session login disabled")
}
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
cclog.Warn("Could not decode cross login JWT HS512 key")
return err
}
ja.loginTokenKey = bytes
cclog.Info("JWT Session authenticator successfully registered")
return nil
}
@@ -60,6 +66,12 @@ func (ja *JWTSessionAuthenticator) Login(
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
// Defense in depth: an empty key would verify any HMAC signature.
// Init() already refuses to register without a key, so this should
// never trigger, but guard explicitly rather than trust the chain.
if len(ja.loginTokenKey) == 0 {
return nil, errors.New("HS login key not configured")
}
return ja.loginTokenKey, nil
}
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@ package graph
// This file will be automatically regenerated based on the schema, any resolver
// implementations
// will be copied through when generating and any unknown code will be moved to the end.
// Code generated by github.com/99designs/gqlgen version v0.17.88
// Code generated by github.com/99designs/gqlgen version v0.17.90
import (
"context"
@@ -1072,12 +1072,10 @@ func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
// SubCluster returns generated.SubClusterResolver implementation.
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
type (
clusterResolver struct{ *Resolver }
jobResolver struct{ *Resolver }
metricValueResolver struct{ *Resolver }
mutationResolver struct{ *Resolver }
nodeResolver struct{ *Resolver }
queryResolver struct{ *Resolver }
subClusterResolver struct{ *Resolver }
)
type clusterResolver struct{ *Resolver }
type jobResolver struct{ *Resolver }
type metricValueResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver }
type nodeResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver }

View File

@@ -76,8 +76,15 @@ func (r *JobRepository) QueryJobs(
}
if page != nil && page.ItemsPerPage != -1 {
// -1 is the only valid non-positive value ("load all"); reject other
// non-positive values so that uint64(page.ItemsPerPage) cannot underflow
// into a huge limit. Clamp Page to >= 1 to avoid the same on the offset.
if page.ItemsPerPage < 1 {
return nil, fmt.Errorf("invalid items-per-page value: %d", page.ItemsPerPage)
}
p := max(page.Page, 1)
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
query = query.Offset((uint64(p) - 1) * limit).Limit(limit)
}
for _, f := range filters {
@@ -336,8 +343,18 @@ func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBui
}
}
// validMetricName guards metric/footprint names that are interpolated into the
// json_extract() path of footprint queries. SQLite treats double-quoted strings
// as string literals, so an unescaped name (e.g. containing a `"`) would allow
// SQL injection. Legitimate metric names only use these characters.
var validMetricName = regexp.MustCompile(`^[a-zA-Z0-9_]+$`)
// buildFloatJSONCondition creates a filter on a numeric field within the footprint JSON column, using BETWEEN only if required.
func buildFloatJSONCondition(jsonField string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
if !validMetricName.MatchString(jsonField) {
cclog.Warnf("buildFloatJSONCondition: rejecting invalid metric name %q", jsonField)
return query.Where("0 = 1")
}
query = query.Where("JSON_VALID(footprint)")
if cond.From > 0.0 && cond.To > 0.0 {
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") BETWEEN ? AND ?", cond.From, cond.To)

View File

@@ -909,6 +909,13 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
filters []*model.JobFilter,
bins *int,
) (*model.MetricHistoPoints, error) {
// The metric name is interpolated into the json_extract() path of the SQL
// below. SQLite parses double-quoted strings as literals, so reject anything
// that is not a plain metric identifier to prevent SQL injection.
if !validMetricName.MatchString(metric) {
return nil, fmt.Errorf("invalid metric name: %q", metric)
}
// Peak value defines the upper bound for binning: values are distributed across
// bins from 0 to peak. First try to get peak from filtered cluster, otherwise
// scan all clusters to find the maximum peak value.

View File

@@ -311,26 +311,33 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
LeftJoin("jobtag jt ON t.id = jt.tag_id").
GroupBy("t.tag_type, t.tag_name")
// Build scope list for filtering
var scopeBuilder strings.Builder
scopeBuilder.WriteString(`"global"`)
// Build scope list for filtering. Values are parameterized rather than
// interpolated because user.Username originates from external identity
// providers (OIDC/LDAP) and must not be trusted as SQL.
scopes := []string{"global"}
if user != nil {
scopeBuilder.WriteString(`,"`)
scopeBuilder.WriteString(user.Username)
scopeBuilder.WriteString(`"`)
scopes = append(scopes, user.Username)
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
scopeBuilder.WriteString(`,"admin"`)
scopes = append(scopes, "admin")
}
}
q = q.Where("t.tag_scope IN (" + scopeBuilder.String() + ")")
q = q.Where(sq.Eq{"t.tag_scope": scopes})
// Handle Job Ownership
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
// cclog.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
// Build ("project1", "project2", ...) list of variable length directly in SQL string
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
} else if user != nil && user.HasRole(schema.RoleManager) && len(user.Projects) > 0 { // MANAGER: Count own jobs plus project's jobs
// Build a parameterized ("?", "?", ...) placeholder list for the
// variable-length project set instead of interpolating values into SQL.
args := make([]any, 0, len(user.Projects)+1)
args = append(args, user.Username)
placeholders := make([]string, len(user.Projects))
for i, p := range user.Projects {
placeholders[i] = "?"
args = append(args, p)
}
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ? OR job.project IN ("+strings.Join(placeholders, ",")+"))", args...)
} else if user != nil { // USER OR NO ROLE (Compatibility): Only count own jobs
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ?)", user.Username)
}