diff --git a/Makefile b/Makefile index 505281c..3d44172 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ TARGET = ./cc-backend VAR = ./var CFG = config.json .env FRONTEND = ./web/frontend -VERSION = 1.1.0 +VERSION = 1.2.0 GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development') CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S") LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}' diff --git a/ReleaseNotes.md b/ReleaseNotes.md index d66a27f..c10ecd5 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -7,12 +7,12 @@ implementation of ClusterCockpit. ** Breaking changes ** -The LDAP configuration option user_filter was changed and now should not include -the wildcard. Example: -* Old: `"user_filter": "(&(objectclass=posixAccount)(uid=*))"` -* New: `"user_filter": "&(objectclass=posixAccount)"` +* The LDAP configuration option user_filter was changed and now should not include +the uid wildcard. Example: + - Old: `"user_filter": "(&(objectclass=posixAccount)(uid=*))"` + - New: `"user_filter": "(&(objectclass=posixAccount))"` -The aggregate job statistic core hours is now computed using the job table +* The aggregate job statistic core hours is now computed using the job table column `num_hwthreads`. In a future release this column will be renamed to `num_cores`. For correct display of core hours `num_hwthreads` must be correctly filled on job start. If your existing jobs do not provide the correct value in @@ -21,6 +21,10 @@ if you have exclusive jobs, only. Please be aware that we treat this column as it is the number of cores. In case you have SMT enabled and `num_hwthreads` is not the number of cores the core hours will be too high by a factor! +* The jwts key is now mandatory in config.json. It has to set max-age for + validity. Some key names have changed, please refer to + [config documentation](./configs/README.md) for details. + ** NOTE ** If you are using the sqlite3 backend the `PRAGMA` option `foreign_keys` must be explicitly set to ON. If using the sqlite3 console it is per default set to diff --git a/configs/README.md b/configs/README.md index 944d0d3..76029d1 100644 --- a/configs/README.md +++ b/configs/README.md @@ -16,26 +16,41 @@ It is supported to set these by means of a `.env` file in the project root. * `static-files`: Type string. Folder where static assets can be found, if `embed-static-files` is `false`. No default. * `db-driver`: Type string. 'sqlite3' or 'mysql' (mysql will work for mariadb as well). Default `sqlite3`. * `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`. -* `job-archive`: Type string. Path to the job-archive. Default: `./var/job-archive`. +* `job-archive`: Type object. + - `kind`: Type string. At them moment only file is supported as value. + - `path`: Type string. Path to the job-archive. Default: `./var/job-archive`. + - `compression`: Type integer. Setup automatic compression for jobs older than number of days. + - `retention`: Type object. + - `policy`: Type string (required). Retention policy. Possible values none, delete, + move. + - `includeDB`: Type boolean. Also remove jobs from database. + - `age`: Type integer. Act on jobs with startTime older than age (in days). + - `location`: Type string. The target directory for retention. Only applicable for retention policy move. * `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`. * `validate`: Type bool. Validate all input json documents against json schema. * `session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`. -* `jwt-max-age`: Type string. Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `0`. * `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates. * `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url. * `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail! * `stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`. * `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`. +* `jwts`: Type object (required). For JWT Authentication. + - `max-age`: Type string (required). Configure how long a token is valid. As string parsable by time.ParseDuration(). + - `cookieName`: Type string. Cookie that should be checked for a JWT token. + - `vaidateUser`: Type boolean. Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles. + - `trustedIssuer`: Type string. Issuer that should be accepted when validating external JWTs. + - `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt with values provided in JWT. * `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`. - - `url`: Type string. URL of LDAP directory server. - - `user_base`: Type string. Base DN of user tree root. - - `search_dn`: Type string. DN for authenticating LDAP admin account with general read rights. - - `user_bind`: Type string. Expression used to authenticate users via LDAP bind. Must contain `uid={username}`. - - `user_filter`: Type string. Filter to extract users for syncing. + - `url`: Type string (required). URL of LDAP directory server. + - `user_base`: Type string (required). Base DN of user tree root. + - `search_dn`: Type string (required). DN for authenticating LDAP admin account with general read rights. + - `user_bind`: Type string (required). Expression used to authenticate users via LDAP bind. Must contain `uid={username}`. + - `user_filter`: Type string (required). Filter to extract users for syncing. - `username_attr`: Type string. Attribute with full user name. Defaults to `gecos` if not provided. - `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration. - - `sync_del_old_users`: Type bool. Delete obsolete users in database. -* `clusters`: Type array of objects + - `sync_del_old_users`: Type boolean. Delete obsolete users in database. + - `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt if user exists in Ldap directory. +* `clusters`: Type array of objects (required) - `name`: Type string. The name of the cluster. - `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string) - `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example: diff --git a/configs/config.json b/configs/config.json index 3a2c64b..fbc3343 100644 --- a/configs/config.json +++ b/configs/config.json @@ -5,7 +5,7 @@ "user_base": "ou=people,ou=hpc,dc=test,dc=de", "search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de", "user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de", - "user_filter": "(&(objectclass=posixAccount)(uid=*))" + "user_filter": "(&(objectclass=posixAccount))" }, "https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem", "https-key-file": "/etc/letsencrypt/live/url/privkey.pem", diff --git a/internal/graph/util.go b/internal/graph/util.go index b61bcc7..3a2c3b1 100644 --- a/internal/graph/util.go +++ b/internal/graph/util.go @@ -6,7 +6,6 @@ package graph import ( "context" - "errors" "fmt" "math" @@ -33,7 +32,7 @@ func (r *queryResolver) rooflineHeatmap( return nil, err } if len(jobs) > MAX_JOBS_FOR_ANALYSIS { - return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) + return nil, fmt.Errorf("GRAPH/UTIL > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) } fcols, frows := float64(cols), float64(rows) @@ -50,20 +49,24 @@ func (r *queryResolver) rooflineHeatmap( jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx) if err != nil { - log.Error("Error while loading metrics for roofline") + log.Errorf("Error while loading roofline metrics for job %d", job.ID) return nil, err } flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"] if flops_ == nil && membw_ == nil { - return nil, fmt.Errorf("GRAPH/STATS > 'flops_any' or 'mem_bw' missing for job %d", job.ID) + log.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID) + continue + // return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID) } flops, ok1 := flops_["node"] membw, ok2 := membw_["node"] if !ok1 || !ok2 { + log.Info("rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") + continue // TODO/FIXME: - return nil, errors.New("GRAPH/STATS > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") + // return nil, errors.New("GRAPH/UTIL > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") } for n := 0; n < len(flops.Series); n++ { @@ -99,7 +102,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF return nil, err } if len(jobs) > MAX_JOBS_FOR_ANALYSIS { - return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) + return nil, fmt.Errorf("GRAPH/UTIL > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) } avgs := make([][]schema.Float, len(metrics)) diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index cfaa6fd..4874975 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -533,7 +533,9 @@ func (ccms *CCMetricStore) LoadStats( metric := ccms.toLocalName(query.Metric) data := res[0] if data.Error != nil { - return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) + log.Infof("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) + continue + // return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) } metricdata, ok := stats[metric] @@ -543,7 +545,9 @@ func (ccms *CCMetricStore) LoadStats( } if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() { - return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") + log.Infof("fetching %s for node %s failed: one of avg/min/max is NaN", metric, query.Hostname) + continue + // return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") } metricdata[query.Hostname] = schema.MetricStatistics{ diff --git a/web/frontend/src/utils.js b/web/frontend/src/utils.js index 5e9cdae..0650916 100644 --- a/web/frontend/src/utils.js +++ b/web/frontend/src/utils.js @@ -350,31 +350,16 @@ export function binsFromFootprint(weights, scope, values, numBins) { scopeWeights = weights.nodeHours } - const bins = new Array(numBins).fill(0) + const rawBins = new Array(numBins).fill(0) for (let i = 0; i < values.length; i++) - bins[Math.floor(((values[i] - min) / (max - min)) * numBins)] += scopeWeights ? scopeWeights[i] : 1 + rawBins[Math.floor(((values[i] - min) / (max - min)) * numBins)] += scopeWeights ? scopeWeights[i] : 1 - // Manual Canvas Original - // return { - // label: idx => { - // let start = min + (idx / numBins) * (max - min) - // let stop = min + ((idx + 1) / numBins) * (max - min) - // return `${formatNumber(start)} - ${formatNumber(stop)}` - // }, - // bins: bins.map((count, idx) => ({ value: idx, count: count })), - // min: min, - // max: max - // } + const bins = rawBins.map((count, idx) => ({ + value: Math.floor(min + ((idx + 1) / numBins) * (max - min)), + count: count + })) return { - bins: bins.map((count, idx) => ({ - value: idx => { // Use bins' max value instead of mean - // let start = min + (idx / numBins) * (max - min) - let stop = min + ((idx + 1) / numBins) * (max - min) - // return `${formatNumber(Math.floor((start+stop)/2))}` - return Math.floor(stop) - }, - count: count - })) + bins: bins } }