diff --git a/Makefile b/Makefile index 77c3a83..91d3549 100644 --- a/Makefile +++ b/Makefile @@ -76,90 +76,3 @@ config.json: $(SVELTE_TARGETS): $(SVELTE_SRC) $(info ===> BUILD frontend) cd web/frontend && npm install && npm run build - -install: $(TARGET) - @WORKSPACE=$(PREFIX) - @if [ -z "$${WORKSPACE}" ]; then exit 1; fi - @mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR) - @install -Dpm 755 $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET) - @install -Dpm 600 configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json - -.ONESHELL: -.PHONY: RPM -RPM: build/package/cc-backend.spec - @WORKSPACE="$${PWD}" - @SPECFILE="$${WORKSPACE}/build/package/cc-backend.spec" - # Setup RPM build tree - @eval $$(rpm --eval "ARCH='%{_arch}' RPMDIR='%{_rpmdir}' SOURCEDIR='%{_sourcedir}' SPECDIR='%{_specdir}' SRPMDIR='%{_srcrpmdir}' BUILDDIR='%{_builddir}'") - @mkdir --parents --verbose "$${RPMDIR}" "$${SOURCEDIR}" "$${SPECDIR}" "$${SRPMDIR}" "$${BUILDDIR}" - # Create source tarball - @COMMITISH="HEAD" - @VERS=$$(git describe --tags $${COMMITISH}) - @VERS=$${VERS#v} - @VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g) - @if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi - @eval $$(rpmspec --query --queryformat "NAME='%{name}' VERSION='%{version}' RELEASE='%{release}' NVR='%{NVR}' NVRA='%{NVRA}'" --define="VERS $${VERS}" "$${SPECFILE}") - @PREFIX="$${NAME}-$${VERSION}" - @FORMAT="tar.gz" - @SRCFILE="$${SOURCEDIR}/$${PREFIX}.$${FORMAT}" - @git archive --verbose --format "$${FORMAT}" --prefix="$${PREFIX}/" --output="$${SRCFILE}" $${COMMITISH} - # Build RPM and SRPM - @rpmbuild -ba --define="VERS $${VERS}" --rmsource --clean "$${SPECFILE}" - # Report RPMs and SRPMs when in GitHub Workflow - @if [ "$${GITHUB_ACTIONS}" = true ]; then - @ RPMFILE="$${RPMDIR}/$${ARCH}/$${NVRA}.rpm" - @ SRPMFILE="$${SRPMDIR}/$${NVR}.src.rpm" - @ echo "RPM: $${RPMFILE}" - @ echo "SRPM: $${SRPMFILE}" - @ echo "::set-output name=SRPM::$${SRPMFILE}" - @ echo "::set-output name=RPM::$${RPMFILE}" - @fi - -.ONESHELL: -.PHONY: DEB -DEB: build/package/cc-backend.deb.control - @BASEDIR=$${PWD} - @WORKSPACE=$${PWD}/.dpkgbuild - @DEBIANDIR=$${WORKSPACE}/debian - @DEBIANBINDIR=$${WORKSPACE}/DEBIAN - @mkdir --parents --verbose $$WORKSPACE $$DEBIANBINDIR - #@mkdir --parents --verbose $$DEBIANDIR - @CONTROLFILE="$${BASEDIR}/build/package/cc-backend.deb.control" - @COMMITISH="HEAD" - @VERS=$$(git describe --tags --abbrev=0 $${COMMITISH}) - @VERS=$${VERS#v} - @VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g) - @if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi - @ARCH=$$(uname -m) - @ARCH=$$(echo $$ARCH | sed -e s+'_'+'-'+g) - @if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi - @PREFIX="$${NAME}-$${VERSION}_$${ARCH}" - @SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$${WORKSPACE}"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//') - @SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')" - #@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control - @sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control - @mkdir --parents --verbose "$${WORKSPACE}"/$(VAR) - @touch "$${WORKSPACE}"/$(VAR)/job.db - @cd web/frontend && yarn install && yarn build && cd - - @go build -ldflags=${LD_FLAGS} ./cmd/cc-backend - @mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR) - @cp $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET) - @chmod 0755 $${WORKSPACE}/usr/$(BINDIR)/$(TARGET) - @mkdir --parents --verbose $${WORKSPACE}/etc/$(TARGET) - @cp configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json - @chmod 0600 $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json - @mkdir --parents --verbose $${WORKSPACE}/usr/lib/systemd/system - @cp build/package/$(TARGET).service $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service - @chmod 0644 $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service - @mkdir --parents --verbose $${WORKSPACE}/etc/default - @cp build/package/$(TARGET).config $${WORKSPACE}/etc/default/$(TARGET) - @chmod 0600 $${WORKSPACE}/etc/default/$(TARGET) - @mkdir --parents --verbose $${WORKSPACE}/usr/lib/sysusers.d - @cp build/package/$(TARGET).sysusers $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf - @chmod 0644 $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf - @DEB_FILE="cc-metric-store_$${VERS}_$${ARCH}.deb" - @dpkg-deb -b $${WORKSPACE} "$$DEB_FILE" - @rm -r "$${WORKSPACE}" - @if [ "$${GITHUB_ACTIONS}" = "true" ]; then - @ echo "::set-output name=DEB::$${DEB_FILE}" - @fi diff --git a/README.md b/README.md index 36d3c7d..5ce9125 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,16 @@ # NOTE -Please have a look at the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for breaking changes! + +Please have a look at the [Release +Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) +for breaking changes! # ClusterCockpit REST and GraphQL API backend [![Build](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml) This is a Golang backend implementation for a REST and GraphQL API according to -the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications). It also +the [ClusterCockpit +specifications](https://github.com/ClusterCockpit/cc-specifications). It also includes a web interface for ClusterCockpit. This implementation replaces the previous PHP Symfony based ClusterCockpit web interface. The reasons for switching from PHP Symfony to a Golang based solution are explained @@ -14,31 +18,31 @@ switching from PHP Symfony to a Golang based solution are explained ## Overview +This is a Golang web backend for the ClusterCockpit job-specific performance +monitoring framework. It provides a REST API for integrating ClusterCockpit with +an HPC cluster batch system and external analysis scripts. Data exchange between +the web front-end and the back-end is based on a GraphQL API. The web frontend +is also served by the backend using [Svelte](https://svelte.dev/) components. +Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using +[Bootstrap Icons](https://icons.getbootstrap.com/). -This is a Golang web backend for the ClusterCockpit job-specific performance monitoring framework. -It provides a REST API for integrating ClusterCockpit with an HPC cluster batch system and external analysis scripts. -Data exchange between the web front-end and the back-end is based on a GraphQL API. -The web frontend is also served by the backend using [Svelte](https://svelte.dev/) components. -Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/). - -The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by default. -Optionally it can use a MySQL/MariaDB database server. -While there are metric data backends for the InfluxDB and Prometheus time series databases, the only tested and supported setup is to use cc-metric-store as the metric data backend. -Documentation on how to integrate ClusterCockpit with other time series databases will be added in the future. +The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by +default. Optionally it can use a MySQL/MariaDB database server. While there are +metric data backends for the InfluxDB and Prometheus time series databases, the +only tested and supported setup is to use cc-metric-store as the metric data +backend. Documentation on how to integrate ClusterCockpit with other time series +databases will be added in the future. Completed batch jobs are stored in a file-based job archive according to -[this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). +[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). The backend supports authentication via local accounts, an external LDAP directory, and JWT tokens. Authorization for APIs is implemented with [JWT](https://jwt.io/) tokens created with public/private key encryption. -You find more detailed information here: -* `./configs/README.md`: Infos about configuration and setup of cc-backend. -* `./init/README.md`: Infos on how to setup cc-backend as systemd service on Linux. -* `./tools/README.md`: Infos on the JWT authorizatin token workflows in ClusterCockpit. -* `./docs`: You can find further documentation here. There is also a Hands-on tutorial that is recommended to get familiar with the ClusterCockpit setup. +You find a detailed documentation on the [ClusterCockpit +Webpage](https://clustercockpit.org). -**NOTE** +## Build requirements ClusterCockpit requires a current version of the golang toolchain and node.js. You can check `go.mod` to see what is the current minimal golang version needed. @@ -49,7 +53,7 @@ on the Go standard library, it is crucial for security and performance to use a current version of golang. In addition, an old golang toolchain may limit the supported versions of third-party packages. -## How to try ClusterCockpit with a demo setup. +## How to try ClusterCockpit with a demo setup We provide a shell script that downloads demo data and automatically starts the cc-backend. You will need `wget`, `go`, `node`, `npm` in your path to @@ -64,28 +68,32 @@ cd ./cc-backend You can also try the demo using the lates release binary. Create a folder and put the release binary `cc-backend` into this folder. Execute the following steps: -``` -$ ./cc-backend -init -$ vim config.json (Add a second cluster entry and name the clusters alex and fritz) -$ wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar -$ tar xf job-archive-demo.tar -$ ./cc-backend -init-db -add-user demo:admin:demo -loglevel info -$ ./cc-backend -server -dev -loglevel info + +``` shell +./cc-backend -init +vim config.json (Add a second cluster entry and name the clusters alex and fritz) +wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar +tar xf job-archive-demo.tar +./cc-backend -init-db -add-user demo:admin:demo -loglevel info +./cc-backend -server -dev -loglevel info ``` -You can access the web interface at http://localhost:8080. +You can access the web interface at [http://localhost:8080](http://localhost:8080). Credentials for login are `demo:demo`. Please note that some views do not work without a metric backend (e.g., the Analysis, Systems and Status views). -## Howto build and run +## How to build and run -There is a Makefile to automate the build of cc-backend. The Makefile supports the following targets: -* `$ make`: Initialize `var` directory and build svelte frontend and backend binary. Note that there is no proper prerequesite handling. Any change of frontend source files will result in a complete rebuild. -* `$ make clean`: Clean go build cache and remove binary. -* `$ make test`: Run the tests that are also run in the GitHub workflow setup. +There is a Makefile to automate the build of cc-backend. The Makefile supports +the following targets: + +* `make`: Initialize `var` directory and build svelte frontend and backend binary. Note that there is no proper prerequesite handling. Any change of frontend source files will result in a complete rebuild. +* `make clean`: Clean go build cache and remove binary. +* `make test`: Run the tests that are also run in the GitHub workflow setup. A common workflow for setting up cc-backend from scratch is: + ```sh git clone https://github.com/ClusterCockpit/cc-backend.git @@ -116,89 +124,45 @@ ln -s ./var/job-archive ./cc-backend -help ``` -### Run as systemd daemon - -To run this program as a daemon, cc-backend comes with a [example systemd setup](./init/README.md). - -## Configuration and setup - -cc-backend can be used as a local web interface for an existing job archive or -as a server for the ClusterCockpit monitoring framework. - -Create your job archive according to [this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). -At least one cluster directory with a valid `cluster.json` file is required. If -you configure the job archive from scratch, you must also create the job -archive version file that contains the job archive version as an integer. -You can retrieve the currently supported version by running the following -command: -``` -$ ./cc-backend -version -``` -It is ok to have no jobs in the job archive. - -### Configuration - -A configuration file in JSON format must be specified with `-config` to override the default settings. -By default, a `config.json` file located in the current directory of the `cc-backend` process will be loaded even without the `-config` flag. -Documentation of all supported configuration and command line options can be found [here](./configs/README.md). - -## Database initialization and migration - -Each `cc-backend` version supports a specific database version. -At startup, the version of the sqlite database is checked and `cc-backend` terminates if the version does not match. -`cc-backend` supports the migration of the database schema to the required version with the command line option `-migrate-db`. -If the database file does not exist yet, it will be created and initialized with the command line option `-migrate-db`. -If you want to use a newer database version with an older version of cc-backend, you can downgrade a database with the external tool [migrate](https://github.com/golang-migrate/migrate). -In this case, you must specify the path to the migration files in a current source tree: `./internal/repository/migrations/`. - -## Development and testing -When making changes to the REST or GraphQL API, the appropriate code generators must be used. -You must always rebuild `cc-backend` after updating the API files. - -### Update GraphQL schema - -This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API. -The schema can be found in `./api/schema.graphqls`. -After changing it, you need to run `go run github.com/99designs/gqlgen`, which will update `./internal/graph/model`. -If new resolvers are needed, they will be added to `./internal/graph/schema.resolvers.go`, where you will then need to implement them. -If you start `cc-backend` with the `-dev` flag, the GraphQL Playground UI is available at http://localhost:8080/playground. - -### Update Swagger UI - -This project integrates [swagger ui] (https://swagger.io/tools/swagger-ui/) to document and test its REST API. -The swagger documentation files can be found in `./api/`. -You can generate the swagger-ui configuration by running `go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api `. -You need to move the created `./api/docs.go` to `./internal/api/docs.go`. -If you start cc-backend with the `-dev` flag, the Swagger interface is available -at http://localhost:8080/swagger/. -You must enter a JWT key for a user with the API role. - -**NOTE** - -The user who owns the JWT key must not be logged into the same browser (have a -running session), or the Swagger requests will not work. It is recommended to -create a separate user that has only the API role. - -## Development and testing -In case the REST or GraphQL API is changed the according code generators have to be used. - ## Project file structure -- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api) contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in [./api/openapi.yaml](./api/openapi.yaml). -- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend) contains `main.go` for the main application. -- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs) contains documentation about configuration and command line options and required environment variables. A sample configuration file is provided. -- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs) contains more in-depth documentation. -- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init) contains an example of setting up systemd for production use. -- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal) contains library source code that is not intended for use by others. -- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg) contains Go packages that can be used by other projects. -- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools) Additional command line helper tools. - - [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager) Commands for getting infos about and existing job archive. - - [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration) Tool to migrate from previous to current job archive version. - - [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey) Tool to convert external pubkey for use in `cc-backend`. - - [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair) contains a small application to generate a compatible JWT keypair. You find documentation on how to use it [here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md). -- [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web) Server-side templates and frontend-related files: - - [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend) Svelte components and static assets for the frontend UI - - [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates) Server-side Go templates -- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml) Configures the behaviour and generation of [gqlgen](https://github.com/99designs/gqlgen). -- [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh) is a shell script that sets up demo data, and builds and starts `cc-backend`. - +* [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api) +contains the API schema files for the REST and GraphQL APIs. The REST API is +documented in the OpenAPI 3.0 format in +[./api/openapi.yaml](./api/openapi.yaml). +* [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend) +contains `main.go` for the main application. +* [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs) +contains documentation about configuration and command line options and required +environment variables. A sample configuration file is provided. +* [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs) +contains more in-depth documentation. +* [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init) +contains an example of setting up systemd for production use. +* [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal) +contains library source code that is not intended for use by others. +* [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg) +contains Go packages that can be used by other projects. +* [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools) +Additional command line helper tools. + * [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager) + Commands for getting infos about and existing job archive. + * [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration) + Tool to migrate from previous to current job archive version. + * [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey) + Tool to convert external pubkey for use in `cc-backend`. + * [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair) + contains a small application to generate a compatible JWT keypair. You find + documentation on how to use it + [here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md). +* [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web) +Server-side templates and frontend-related files: + * [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend) + Svelte components and static assets for the frontend UI + * [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates) + Server-side Go templates +* [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml) +Configures the behaviour and generation of +[gqlgen](https://github.com/99designs/gqlgen). +* [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh) +is a shell script that sets up demo data, and builds and starts `cc-backend`. diff --git a/build/package/cc-backend.config b/build/package/cc-backend.config deleted file mode 100644 index e80285b..0000000 --- a/build/package/cc-backend.config +++ /dev/null @@ -1,17 +0,0 @@ -CC_USER=clustercockpit - -CC_GROUP=clustercockpit - -CC_HOME=/tmp - -LOG_DIR=/var/log - -DATA_DIR=/var/run/cc-backend - -MAX_OPEN_FILES=10000 - -CONF_DIR=/etc/cc-backend - -CONF_FILE=/etc/cc-backend/cc-backend.json - -RESTART_ON_UPGRADE=true diff --git a/build/package/cc-backend.deb.control b/build/package/cc-backend.deb.control deleted file mode 100644 index 1612c12..0000000 --- a/build/package/cc-backend.deb.control +++ /dev/null @@ -1,12 +0,0 @@ -Package: cc-backend -Version: {VERSION} -Installed-Size: {INSTALLED_SIZE} -Architecture: {ARCH} -Maintainer: thomas.gruber@fau.de -Depends: libc6 (>= 2.2.1) -Build-Depends: debhelper-compat (= 13), git, golang-go, npm, yarn -Description: ClusterCockpit backend and web frontend -Homepage: https://github.com/ClusterCockpit/cc-backend -Source: cc-backend -Rules-Requires-Root: no - diff --git a/build/package/cc-backend.service b/build/package/cc-backend.service deleted file mode 100644 index e29c2e7..0000000 --- a/build/package/cc-backend.service +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -Description=ClusterCockpit backend and web frontend (cc-backend) -Documentation=https://github.com/ClusterCockpit/cc-backend -Wants=network-online.target -After=network-online.target - -[Service] -EnvironmentFile=/etc/default/cc-backend -Type=simple -User=clustercockpit -Group=clustercockpit -Restart=on-failure -TimeoutStopSec=100 -LimitNOFILE=infinity -ExecStart=/usr/bin/cc-backend --config ${CONF_FILE} - -[Install] -WantedBy=multi-user.target diff --git a/build/package/cc-backend.spec b/build/package/cc-backend.spec deleted file mode 100644 index d96d94e..0000000 --- a/build/package/cc-backend.spec +++ /dev/null @@ -1,70 +0,0 @@ -Name: cc-backend -Version: %{VERS} -Release: 1%{?dist} -Summary: ClusterCockpit backend and web frontend - -License: MIT -Source0: %{name}-%{version}.tar.gz - -#BuildRequires: go-toolset -#BuildRequires: systemd-rpm-macros -#BuildRequires: npm - -Provides: %{name} = %{version} - -%description -ClusterCockpit backend and web frontend - -%global debug_package %{nil} - -%prep -%autosetup - - -%build -#CURRENT_TIME=$(date +%Y-%m-%d:T%H:%M:\%S) -#LD_FLAGS="-s -X main.buildTime=${CURRENT_TIME} -X main.version=%{VERS}" -mkdir ./var -touch ./var/job.db -cd web/frontend && yarn install && yarn build && cd - -go build -ldflags="-s -X main.version=%{VERS}" ./cmd/cc-backend - - -%install -# Install cc-backend -#make PREFIX=%{buildroot} install -install -Dpm 755 cc-backend %{buildroot}/%{_bindir}/%{name} -install -Dpm 0600 configs/config.json %{buildroot}%{_sysconfdir}/%{name}/%{name}.json -# Integrate into system -install -Dpm 0644 build/package/%{name}.service %{buildroot}%{_unitdir}/%{name}.service -install -Dpm 0600 build/package/%{name}.config %{buildroot}%{_sysconfdir}/default/%{name} -install -Dpm 0644 build/package/%{name}.sysusers %{buildroot}%{_sysusersdir}/%{name}.conf - - -%check -# go test should be here... :) - -%pre -%sysusers_create_package scripts/%{name}.sysusers - -%post -%systemd_post %{name}.service - -%preun -%systemd_preun %{name}.service - -%files -# Binary -%attr(-,clustercockpit,clustercockpit) %{_bindir}/%{name} -# Config -%dir %{_sysconfdir}/%{name} -%attr(0600,clustercockpit,clustercockpit) %config(noreplace) %{_sysconfdir}/%{name}/%{name}.json -# Systemd -%{_unitdir}/%{name}.service -%{_sysconfdir}/default/%{name} -%{_sysusersdir}/%{name}.conf - -%changelog -* Mon Mar 07 2022 Thomas Gruber - 0.1 -- Initial metric store implementation - diff --git a/build/package/cc-backend.sysusers b/build/package/cc-backend.sysusers deleted file mode 100644 index 5d4abc5..0000000 --- a/build/package/cc-backend.sysusers +++ /dev/null @@ -1,2 +0,0 @@ -#Type Name ID GECOS Home directory Shell -u clustercockpit - "User for ClusterCockpit" /run/cc-backend /sbin/nologin diff --git a/configs/README.md b/configs/README.md deleted file mode 100644 index 1ee8cb8..0000000 --- a/configs/README.md +++ /dev/null @@ -1,93 +0,0 @@ -## Intro - -cc-backend requires a configuration file that specifies the cluster systems to be used. -To override the default, specify the location of a json configuration file with the `-config ` command line option. -All security-related configurations, e.g. keys and passwords, are set using -environment variables. -It is supported to set these by means of a `.env` file in the project root. - -## Configuration Options - -* `addr`: Type string. Address where the http (or https) server will listen on (for example: 'localhost:80'). Default `:8080`. -* `apiAllowedIPs`: Type string array. Addresses from which the secured API endpoints (/users and other auth related endpoints) can be reached -* `user`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port. -* `group`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port. -* `disable-authentication`: Type bool. Disable authentication (for everything: API, Web-UI, ...). Default `false`. -* `embed-static-files`: Type bool. If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not. Default `true`. -* `static-files`: Type string. Folder where static assets can be found, if `embed-static-files` is `false`. No default. -* `db-driver`: Type string. 'sqlite3' or 'mysql' (mysql will work for mariadb as well). Default `sqlite3`. -* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`. -* `job-archive`: Type object. - - `kind`: Type string. At them moment only file is supported as value. - - `path`: Type string. Path to the job-archive. Default: `./var/job-archive`. - - `compression`: Type integer. Setup automatic compression for jobs older than number of days. - - `retention`: Type object. - - `policy`: Type string (required). Retention policy. Possible values none, delete, - move. - - `includeDB`: Type boolean. Also remove jobs from database. - - `age`: Type integer. Act on jobs with startTime older than age (in days). - - `location`: Type string. The target directory for retention. Only applicable for retention policy move. -* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`. -* `validate`: Type bool. Validate all input json documents against json schema. -* `session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`. -* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates. -* `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url. -* `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail! -* `stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`. -* `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`. -* `jwts`: Type object (required). For JWT Authentication. - - `max-age`: Type string (required). Configure how long a token is valid. As string parsable by time.ParseDuration(). - - `cookieName`: Type string. Cookie that should be checked for a JWT token. - - `vaidateUser`: Type boolean. Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles. - - `trustedIssuer`: Type string. Issuer that should be accepted when validating external JWTs. - - `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt with values provided in JWT. -* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`. - - `url`: Type string (required). URL of LDAP directory server. - - `user_base`: Type string (required). Base DN of user tree root. - - `search_dn`: Type string (required). DN for authenticating LDAP admin account with general read rights. - - `user_bind`: Type string (required). Expression used to authenticate users via LDAP bind. Must contain `uid={username}`. - - `user_filter`: Type string (required). Filter to extract users for syncing. - - `username_attr`: Type string. Attribute with full user name. Defaults to `gecos` if not provided. - - `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration. - - `sync_del_old_users`: Type boolean. Delete obsolete users in database. - - `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt if user exists in Ldap directory. -* `clusters`: Type array of objects (required) - - `name`: Type string. The name of the cluster. - - `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string) - - `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example: - ``` - "filterRanges": { - "numNodes": { "from": 1, "to": 64 }, - "duration": { "from": 0, "to": 86400 }, - "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } - } - ``` -* `ui-defaults`: Type object. Default configuration for ui views. If overwritten, all options must be provided! Most options can be overwritten by the user via the web interface. - - `analysis_view_histogramMetrics`: Type string array. Metrics to show as job count histograms in analysis view. Default `["flops_any", "mem_bw", "mem_used"]`. - - `analysis_view_scatterPlotMetrics`: Type array of string array. Initial - scatter plot configuration in analysis view. Default `[["flops_any", "mem_bw"], ["flops_any", "cpu_load"], ["cpu_load", "mem_bw"]]`. - - `job_view_nodestats_selectedMetrics`: Type string array. Initial metrics shown in node statistics table of single job view. Default `["flops_any", "mem_bw", "mem_used"]`. - - `job_view_polarPlotMetrics`: Type string array. Metrics shown in polar plot of single job view. Default `["flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"]`. - - `job_view_selectedMetrics`: Type string array. Default `["flops_any", "mem_bw", "mem_used"]`. - - `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`. - - `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`. - - `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`. - - `plot_list_jobsPerPage`: Type int. Jobs shown per page in job lists. Default `50`. - - `plot_list_selectedMetrics`: Type string array. Initial metric plots shown in jobs lists. Default `"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"`. - - `plot_view_plotsPerRow`: Type int. Number of plots per row in single job view. Default `3`. - - `plot_view_showPolarplot`: Type bool. Option to toggle polar plot in single job view. Default `true`. - - `plot_view_showRoofline`: Type bool. Option to toggle roofline plot in single job view. Default `true`. - - `plot_view_showStatTable`: Type bool. Option to toggle the node statistic table in single job view. Default `true`. - - `system_view_selectedMetric`: Type string. Initial metric shown in system view. Default `cpu_load`. - -Some of the `ui-defaults` values can be appended by `:` in order to have different settings depending on the current cluster. Those are notably `job_view_nodestats_selectedMetrics`, `job_view_polarPlotMetrics`, `job_view_selectedMetrics` and `plot_list_selectedMetrics`. - -## Environment Variables - -An example env file is found in this directory. Copy it to `.env` in the project root and adapt it for your needs. - -* `JWT_PUBLIC_KEY` and `JWT_PRIVATE_KEY`: Base64 encoded Ed25519 keys used for JSON Web Token (JWT) authentication. You can generate your own keypair using `go run ./cmd/gen-keypair/gen-keypair.go`. More information in [README_TOKENS.md](./README_TOKENS.md). -* `SESSION_KEY`: Some random bytes used as secret for cookie-based sessions. -* `LDAP_ADMIN_PASSWORD`: The LDAP admin user password (optional). -* `CROSS_LOGIN_JWT_HS512_KEY`: Used for token based logins via another authentication service. -* `LOGLEVEL`: Can be `err`, `warn`, `info` or `debug` (optional, `warn` by default). Can be used to reduce logging. diff --git a/configs/README_TOKENS.md b/configs/README_TOKENS.md deleted file mode 100644 index be8a912..0000000 --- a/configs/README_TOKENS.md +++ /dev/null @@ -1,51 +0,0 @@ -## Introduction - -ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs. -JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object. -This information can be verified and trusted because it is digitally signed. -In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA. -Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it. -Expiration of the generated tokens as well as the max. length of a browser session can be configured in the `config.json` file described [here](./README.md). - -The [Ed25519](https://ed25519.cr.yp.to/) algorithm for signatures was used because it is compatible with other tools that require authentication, such as NATS.io, and because these elliptic-curve methods provide simillar security with smaller keys compared to something like RSA. They are sligthly more expensive to validate, but that effect is negligible. - -## JWT Payload - -You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io). -Currently ClusterCockpit sets the following claims: -* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT. -* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username. -* `roles`: An array of strings specifying the roles set for the subject. -* `exp`: Expiration date of the token (only if explicitly configured) - -It is important to know that JWTs are not encrypted, only signed. This means that outsiders cannot create new JWTs or modify existing ones, but they are able to read out the username. - -## Workflow - -1. Create a new ECDSA Public/private keypair: -``` -$ go build ./cmd/gen-keypair/ -$ ./gen-keypair -``` -2. Add keypair in your `.env` file. A template can be found in `./configs`. - -When a user logs in via the `/login` page using a browser, a session cookie (secured using the random bytes in the `SESSION_KEY` env. variable you shoud change as well) is used for all requests after the successfull login. The JWTs make it easier to use the APIs of ClusterCockpit using scripts or other external programs. The token is specified n the `Authorization` HTTP header using the [Bearer schema](https://datatracker.ietf.org/doc/html/rfc6750) (there is an example below). Tokens can be issued to users from the configuration view in the Web-UI or the command line. In order to use the token for API endpoints such as `/api/jobs/start_job/`, the user that executes it needs to have the `api` role. Regular users can only perform read-only queries and only look at data connected to jobs they started themselves. - -## cc-metric-store - -The [cc-metric-store](https://github.com/ClusterCockpit/cc-metric-store) also uses JWTs for authentication. As it does not issue new tokens, it does not need to kown the private key. The public key of the keypair that is used to generate the JWTs that grant access to the `cc-metric-store` can be specified in its `config.json`. When configuring the `metricDataRepository` object in the `cluster.json` file, you can put a token issued by ClusterCockpit itself. - -## Setup user and JWT token for REST API authorization - -1. Create user: -``` -$ ./cc-backend --add-user :api: --no-server -``` -2. Issue token for user: -``` -$ ./cc-backend --jwt --no-server -``` -3. Use issued token token on client side: -``` -$ curl -X GET "" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer " -``` diff --git a/docs/ConfigurationManagement.md b/docs/ConfigurationManagement.md deleted file mode 100644 index 4340741..0000000 --- a/docs/ConfigurationManagement.md +++ /dev/null @@ -1,38 +0,0 @@ -# Release versions - -Versions are marked according to [semantic versioning] (https://semver.org). -Each version embeds the following static assets in the binary: -* Web frontend with javascript files and all static assets. -* Golang template files for server-side rendering. -* JSON schema files for validation. -* Database migration files. - -The remaining external assets are: -* The SQL database used. -* The job archive -* The configuration files `config.json` and `.env`. - -The external assets are versioned with integer IDs. -This means that each release binary is bound to specific versions of the SQL -database and the job archive. -The configuration file is checked against the current schema at startup. -The `-migrate-db` command line switch can be used to upgrade the SQL database -to migrate from a previous version to the latest one. -We offer a separate tool `archive-migration` to migrate an existing job archive -archive from the previous to the latest version. - -# Versioning of APIs - -cc-backend provides two API backends: -* A REST API for querying jobs. -* A GraphQL API for data exchange between web frontend and cc-backend. - -The REST API will also be versioned. We still have to decide whether we will also -support older REST API versions by versioning the endpoint URLs. -The GraphQL API is for internal use and will not be versioned. - -# How to build - -In general it is recommended to use the provided release binary. -In case you want to build build `cc-backend` please always use the provided makefile. This will ensure -that the frontend is also built correctly and that the version in the binary is encoded in the binary. diff --git a/docs/Hands-on.md b/docs/Hands-on.md deleted file mode 100644 index 7f9d2f4..0000000 --- a/docs/Hands-on.md +++ /dev/null @@ -1,234 +0,0 @@ -# Hands-on setup ClusterCockpit from scratch (w/o docker) - -## Prerequisites -* perl -* go -* npm -* Optional: curl -* Script migrateTimestamp.pl - -## Documentation -You find READMEs or api docs in -* ./cc-backend/configs -* ./cc-backend/init -* ./cc-backend/api - -## ClusterCockpit configuration files -### cc-backend -* `./.env` Passwords and Tokens set in the environment -* `./config.json` Configuration options for cc-backend - -### cc-metric-store -* `./config.json` Optional to overwrite configuration options - -### cc-metric-collector -Not yet included in the hands-on setup. - -## Setup Components -Start by creating a base folder for all of the following steps. -* `mkdir clustercockpit` -* `cd clustercockpit` - -### Setup cc-backend -* Clone Repository - - `git clone https://github.com/ClusterCockpit/cc-backend.git` - - `cd cc-backend` -* Build - - `make` -* Activate & configure environment for cc-backend - - `cp configs/env-template.txt .env` - - Optional: Have a look via `vim .env` - - Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./` -* Back to toplevel `clustercockpit` - - `cd ..` -* Prepare Datafolder and Database file - - `mkdir var` - - `./cc-backend -migrate-db` - -### Setup cc-metric-store -* Clone Repository - - `git clone https://github.com/ClusterCockpit/cc-metric-store.git` - - `cd cc-metric-store` -* Build Go Executable - - `go get` - - `go build` -* Prepare Datafolders - - `mkdir -p var/checkpoints` - - `mkdir -p var/archive` -* Update Config - - `vim config.json` - - Exchange existing setting in `metrics` with the following: -``` -"clock": { "frequency": 60, "aggregation": null }, -"cpi": { "frequency": 60, "aggregation": null }, -"cpu_load": { "frequency": 60, "aggregation": null }, -"flops_any": { "frequency": 60, "aggregation": null }, -"flops_dp": { "frequency": 60, "aggregation": null }, -"flops_sp": { "frequency": 60, "aggregation": null }, -"ib_bw": { "frequency": 60, "aggregation": null }, -"lustre_bw": { "frequency": 60, "aggregation": null }, -"mem_bw": { "frequency": 60, "aggregation": null }, -"mem_used": { "frequency": 60, "aggregation": null }, -"rapl_power": { "frequency": 60, "aggregation": null } -``` -* Back to toplevel `clustercockpit` - - `cd ..` - -### Setup Demo Data -* `mkdir source-data` -* `cd source-data` -* Download JobArchive-Source: - - `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz` - - `tar xJf job-archive-dev.tar.xz` - - `mv ./job-archive ./job-archive-source` - - `rm ./job-archive-dev.tar.xz` -* Download CC-Metric-Store Checkpoints: - - `mkdir -p cc-metric-store-source/checkpoints` - - `cd cc-metric-store-source/checkpoints` - - `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz` - - `tar xf cc-metric-store-checkpoints.tar.xz` - - `rm cc-metric-store-checkpoints.tar.xz` -* Back to `source-data` - - `cd ../..` -* Run timestamp migration script. This may take tens of minutes! - - `cp ../migrateTimestamps.pl .` - - `./migrateTimestamps.pl` - - Expected output: -``` -Starting to update start- and stoptimes in job-archive for emmy -Starting to update start- and stoptimes in job-archive for woody -Done for job-archive -Starting to update checkpoint filenames and data starttimes for emmy -Starting to update checkpoint filenames and data starttimes for woody -Done for checkpoints -``` -* Copy `cluster.json` files from source to migrated folders - - `cp source-data/job-archive-source/emmy/cluster.json cc-backend/var/job-archive/emmy/` - - `cp source-data/job-archive-source/woody/cluster.json cc-backend/var/job-archive/woody/` -* Initialize Job-Archive in SQLite3 job.db and add demo user - - `cd cc-backend` - - `./cc-backend -init-db -add-user demo:admin:demo` - - Expected output: -``` -<6>[INFO] new user "demo" created (roles: ["admin"], auth-source: 0) -<6>[INFO] Building job table... -<6>[INFO] A total of 3936 jobs have been registered in 1.791 seconds. -``` -* Back to toplevel `clustercockpit` - - `cd ..` - -### Startup both Apps -* In cc-backend root: `$./cc-backend -server -dev` - - Starts Clustercockpit at `http:localhost:8080` - - Log: `<6>[INFO] HTTP server listening at :8080...` - - Use local internet browser to access interface - - You should see and be able to browse finished Jobs - - Metadata is read from SQLite3 database - - Metricdata is read from job-archive/JSON-Files - - Create User in settings (top-right corner) - - Name `apiuser` - - Username `apiuser` - - Role `API` - - Submit & Refresh Page - - Create JTW for `apiuser` - - In Userlist, press `Gen. JTW` for `apiuser` - - Save JWT for later use -* In cc-metric-store root: `$./cc-metric-store` - - Start the cc-metric-store on `http:localhost:8081`, Log: -``` -2022/07/15 17:17:42 Loading checkpoints newer than 2022-07-13T17:17:42+02:00 -2022/07/15 17:17:45 Checkpoints loaded (5621 files, 319 MB, that took 3.034652s) -2022/07/15 17:17:45 API http endpoint listening on '0.0.0.0:8081' -``` - - Does *not* have a graphical interface - - Otpional: Test function by executing: -``` -$ curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -D - "http://localhost:8081/api/query" -d "{ \"cluster\": \"emmy\", \"from\": $(expr $(date +%s) - 60), \"to\": $(date +%s), \"queries\": [{ - \"metric\": \"flops_any\", - \"host\": \"e1111\" -}] }" - -HTTP/1.1 200 OK -Content-Type: application/json -Date: Fri, 15 Jul 2022 13:57:22 GMT -Content-Length: 119 -{"results":[[JSON-DATA-ARRAY]]} -``` - -### Development API web interfaces -The `-dev` flag enables web interfaces to document and test the apis: -* http://localhost:8080/playground - A GraphQL playground. To use it you must have a authenticated session in the same browser. -* http://localhost:8080/swagger - A Swagger UI. To use it you have to be logged out, so no user session in the same browser. Use the JWT token with role Api generate previously to authenticate via http header. - -### Use cc-backend API to start job -* Enter the URL `http://localhost:8080/swagger/index.html` in your browser. -* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window. -* Click the `/job/start_job` endpoint and click the Try it out button. -* Enter the following json into the request body text area and fill in a recent start timestamp by executing `date +%s`.: -``` -{ - "jobId": 100000, - "arrayJobId": 0, - "user": "ccdemouser", - "subCluster": "main", - "cluster": "emmy", - "startTime": , - "project": "ccdemoproject", - "resources": [ - {"hostname": "e0601"}, - {"hostname": "e0823"}, - {"hostname": "e0337"}, - {"hostname": "e1111"}], - "numNodes": 4, - "numHwthreads": 80, - "walltime": 86400 -} -``` -* The response body should be the database id of the started job, for example: -``` -{ - "id": 3937 -} -``` -* Check in ClusterCockpit - - User `ccdemouser` should appear in Users-Tab with one running job - - It could take up to 5 Minutes until the Job is displayed with some current data (5 Min Short-Job Filter) - - Job then is marked with a green `running` tag - - Metricdata displayed is read from cc-metric-store! - - -### Use cc-backend API to stop job -* Enter the URL `http://localhost:8080/swagger/index.html` in your browser. -* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window. -* Click the `/job/stop_job/{id}` endpoint and click the Try it out button. -* Enter the database id at id that was returned by `start_job` and copy the following into the request body. Replace the timestamp with a recent one: -``` -{ - "cluster": "emmy", - "jobState": "completed", - "stopTime": -} -``` -* On success a json document with the job meta data is returned. - -* Check in ClusterCockpit - - User `ccdemouser` should appear in Users-Tab with one completed job - - Job is no longer marked with a green `running` tag -> Completed! - - Metricdata displayed is now read from job-archive! -* Check in job-archive - - `cd ./cc-backend/var/job-archive/emmy/100/000` - - `cd $STARTTIME` - - Inspect `meta.json` and `data.json` - -## Helper scripts -* In this tarball you can find the perl script `generate_subcluster.pl` that helps to generate the subcluster section for your system. -Usage: -* Log into an exclusive cluster node. -* The LIKWID tools likwid-topology and likwid-bench must be in the PATH! -* `$./generate_subcluster.pl` outputs the subcluster section on `stdout` - -Please be aware that -* You have to enter the name and node list for the subCluster manually. -* GPU detection only works if LIKWID was build with Cuda avalable and you run likwid-topology also with Cuda loaded. -* Do not blindly trust the measured peakflops values. -* Because the script blindly relies on the CSV format output by likwid-topology this is a fragile undertaking! diff --git a/docs/JWT-Handling.md b/docs/JWT-Handling.md deleted file mode 100644 index 8b8d000..0000000 --- a/docs/JWT-Handling.md +++ /dev/null @@ -1,99 +0,0 @@ -## Introduction - -ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs. JSON -Web Token (JWT) is an open standard (RFC 7519) that defines a compact and -self-contained way for securely transmitting information between parties as a -JSON object. This information can be verified and trusted because it is -digitally signed. In ClusterCockpit JWTs are signed using a public/private key -pair using ECDSA. Because tokens are signed using public/private key pairs, the -signature also certifies that only the party holding the private key is the one -that signed it. Token expiration is set to the configuration option MaxAge. - -## JWT Payload - -You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io). -Currently ClusterCockpit sets the following claims: -* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT. -* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username. -* `roles`: An array of strings specifying the roles set for the subject. - -## Workflow - -1. Create a new ECDSA Public/private keypair: -``` -$ go build ./tools/gen-keypair.go -$ ./gen-keypair -``` -2. Add keypair in your `.env` file. A template can be found in `./configs`. - -There are two usage scenarios: -* The APIs are used during a browser session. API accesses are authorized with - the active session. -* The REST API is used outside a browser session, e.g. by scripts. In this case - you have to issue a token manually. This possible from within the - configuration view or on the command line. It is recommended to issue a JWT - token in this case for a special user that only has the `api` role. By using - different users for different purposes a fine grained access control and - access revocation management is possible. - -The token is commonly specified in the Authorization HTTP header using the Bearer schema. - -## Setup user and JWT token for REST API authorization - -1. Create user: -``` -$ ./cc-backend --add-user :api: --no-server -``` -2. Issue token for user: -``` -$ ./cc-backend -jwt -no-server -``` -3. Use issued token token on client side: -``` -$ curl -X GET "" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer " -``` - -## Accept externally generated JWTs provided via cookie -If there is an external service like an AuthAPI that can generate JWTs and hand -them over to ClusterCockpit via cookies, CC can be configured to accept them: - -1. `.env`: CC needs a public ed25519 key to verify foreign JWT signatures. - Public keys in PEM format can be converted with the instructions in - [/tools/convert-pem-pubkey-for-cc](../tools/convert-pem-pubkey-for-cc/Readme.md) - . - -``` -CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc=" -``` - -2. `config.json`: Insert a name for the cookie (set by the external service) - containing the JWT so that CC knows where to look at. Define a trusted issuer - (JWT claim 'iss'), otherwise it will be rejected. If you want usernames and - user roles from JWTs ('sub' and 'roles' claim) to be validated against CC's - internal database, you need to enable it here. Unknown users will then be - rejected and roles set via JWT will be ignored. - -```json -"jwts": { - "cookieName": "access_cc", - "forceJWTValidationViaDatabase": true, - "trustedExternalIssuer": "auth.example.com" -} -``` - -3. Make sure your external service includes the same issuer (`iss`) in its JWTs. - Example JWT payload: - -```json -{ - "iat": 1668161471, - "nbf": 1668161471, - "exp": 1668161531, - "sub": "alice", - "roles": [ - "user" - ], - "jti": "a1b2c3d4-1234-5678-abcd-a1b2c3d4e5f6", - "iss": "auth.example.com" -} -``` diff --git a/docs/Job-Archive.md b/docs/Job-Archive.md deleted file mode 100644 index 601f32d..0000000 --- a/docs/Job-Archive.md +++ /dev/null @@ -1,78 +0,0 @@ -The job archive specifies an exchange format for job meta and performance metric -data. It consists of two parts: -* a [SQLite database schema](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#sqlite-database-schema) for job meta data and performance statistics -* a [Json file format](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#json-file-format) together with a [Directory hierarchy specification](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#directory-hierarchy-specification) - -By using an open, portable and simple specification based on files it is -possible to exchange job performance data for research and analysis purposes as -well as use it as a robust way for archiving job performance data to disk. - -# SQLite database schema -## Introduction - -A SQLite 3 database schema is provided to standardize the job meta data -information in a portable way. The schema also includes optional columns for job -performance statistics (called a job performance footprint). The database acts -as a front end to filter and select subsets of job IDs, that are the keys to get -the full job performance data in the job performance tree hierarchy. - -## Database schema - -The schema includes 3 tables: the job table, a tag table and a jobtag table -representing the MANY-TO-MANY relation between jobs and tags. The SQL schema is -specified -[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/schemas/jobs-sqlite.sql). -Explanation of the various columns including the JSON datatypes is documented -[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/job-meta.schema.json). - -# Directory hierarchy specification - -## Specification - -To manage the number of directories within a single directory a tree approach is -used splitting the integer job ID. The job id is split in junks of 1000 each. -Usually 2 layers of directories is sufficient but the concept can be used for an -arbitrary number of layers. - -For a 2 layer schema this can be achieved with (code example in Perl): -``` perl -$level1 = $jobID/1000; -$level2 = $jobID%1000; -$dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2); -``` - -## Example - -For the job ID 1034871 the directory path is `./1034/871/`. - -# Json file format -## Overview - -Every cluster must be configured in a `cluster.json` file. - -The job data consists of two files: -* `meta.json`: Contains job meta information and job statistics. -* `data.json`: Contains complete job data with time series - -The description of the json format specification is available as [[json -schema|https://json-schema.org/]] format file. The latest version of the json -schema is part of the `cc-backend` source tree. For external reference it is -also available in a separate repository. - -## Specification `cluster.json` - -The json schema specification is available -[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/cluster.schema.json). - -## Specification `meta.json` - -The json schema specification is available -[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-meta.schema.json). - -## Specification `data.json` - -The json schema specification is available -[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-data.schema.json). -Metric time series data is stored for a fixed time step. The time step is set -per metric. If no value is available for a metric time series data timestamp -`null` is entered. diff --git a/docs/adm-customization.md b/docs/adm-customization.md deleted file mode 100644 index da2c7eb..0000000 --- a/docs/adm-customization.md +++ /dev/null @@ -1,29 +0,0 @@ -# Overview - -Customizing `cc-backend` means changing the logo, legal texts, and the login -template instead of the placeholders. You can also place a text file in `./var` -to add dynamic status or notification messages to the clusterCockpit homepage. - -# Replace legal texts - -To replace the `imprint.tmpl` and `privacy.tmpl` legal texts, you can place your -version in `./var/`. At startup `cc-backend` will check if `./var/imprint.tmpl` and/or -`./var/privacy.tmpl` exist and use them instead of the built-in placeholders. -You can use the placeholders in `web/templates` as a blueprint. - -# Replace login template -To replace the default login layout and styling, you can place your version in -`./var/`. At startup `cc-backend` will check if `./var/login.tmpl` exist and use -it instead of the built-in placeholder. You can use the default temaplte -`web/templates/login.tmpl` as a blueprint. - -# Replace logo -To change the logo displayed in the navigation bar, you can provide the file -`logo.png` in the folder `./var/img/`. On startup `cc-backend` will check if the -folder exists and use the images provided there instead of the built-in images. -You may also place additional images there you use in a custom login template. - -# Add notification banner on homepage -To add a notification banner you can add a file `notice.txt` to `./var`. As long -as this file is present all text in this file is shown in an info banner on the -homepage. diff --git a/docs/adm-upgrade.md b/docs/adm-upgrade.md deleted file mode 100644 index bfe2933..0000000 --- a/docs/adm-upgrade.md +++ /dev/null @@ -1,78 +0,0 @@ -In general, an upgrade is nothing more than a replacement of the binary file. -All the necessary files, except the database file, the configuration file and -the job archive, are embedded in the binary file. It is recommended to use a -directory where the file names of the binary files are named with a version -indicator. This can be, for example, the date or the Unix epoch time. A symbolic -link points to the version to be used. This makes it easier to switch to earlier -versions. - -The database and the job archive are versioned. Each release binary supports -specific versions of the database and job archive. If a version mismatch is -detected, the application is terminated and migration is required. - -**IMPORTANT NOTE** - -It is recommended to make a backup copy of the database before each update. This -is mandatory in case the database needs to be migrated. In the case of sqlite, -this means to stopping `cc-backend` and copying the sqlite database file -somewhere. - -# Migrating the database - -After you have backed up the database, run the following command to migrate the -database to the latest version: -``` -$ ./cc-backend -migrate-db -``` - -The migration files are embedded in the binary and can also be viewed in the cc -backend [source tree](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository/migrations). -There are separate migration files for both supported -database backends. -We use the [migrate library](https://github.com/golang-migrate/migrate). - -If something goes wrong, you can check the status and get the current schema -(here for sqlite): -``` -$ sqlite3 var/job.db -``` -In the sqlite console execute: -``` -.schema -``` -to get the current databse schema. -You can query the current version and whether the migration failed with: -``` -SELECT * FROM schema_migrations; -``` -The first column indicates the current database version and the second column is -a dirty flag indicating whether the migration was successful. - -# Migrating the job archive - -Job archive migration requires a separate tool (`archive-migration`), which is -part of the cc-backend source tree (build with `go build ./tools/archive-migration`) -and is also provided as part of the releases. - -Migration is supported only between two successive releases. The migration tool -migrates the existing job archive to a new job archive. This means that there -must be enough disk space for two complete job archives. If the tool is called -without options: -``` -$ ./archive-migration -``` - -it is assumed that a job archive exists in `./var/job-archive`. The new job -archive is written to `./var/job-archive-new`. Since execution is threaded in case -of a fatal error, it is impossible to determine in which job the error occurred. -In this case, you can run the tool in debug mode (with the `-debug` flag). In -debug mode, threading is disabled and the job ID of each migrated job is output. -Jobs with empty files will be skipped. Between multiple runs of the tools, the -`job-archive-new` directory must be moved or deleted. - -The `cluster.json` files in `job-archive-new` must be checked for errors, especially -whether the aggregation attribute is set correctly for all metrics. - -Migration takes several hours for relatively large job archives (several hundred -GB). A versioned job archive contains a version.txt file in the root directory -of the job archive. This file contains the version as an unsigned integer. diff --git a/docs/dev-authentication.md b/docs/dev-authentication.md deleted file mode 100644 index 4346e97..0000000 --- a/docs/dev-authentication.md +++ /dev/null @@ -1,180 +0,0 @@ -# Overview - -The authentication is implemented in `internal/auth/`. In `auth.go` -an interface is defined that any authentication provider must fulfill. It also -acts as a dispatcher to delegate the calls to the available authentication -providers. - -Two authentication types are available: -* JWT authentication for the REST API that does not create a session cookie -* Session based authentication using a session cookie - -The most important routines in auth are: -* `Login()` Handle POST request to login user and start a new session -* `Auth()` Authenticate user and put User Object in context of the request - -The http router calls auth in the following cases: -* `r.Handle("/login", authentication.Login( ... )).Methods(http.MethodPost)`: - The POST request on the `/login` route will call the Login callback. -* `r.Handle("/jwt-login", authentication.Login( ... ))`: - Any request on the `/jwt-login` route will call the Login callback. Intended - for use for the JWT token based authenticators. -* Any route in the secured subrouter will always call Auth(), on success it will - call the next handler in the chain, on failure it will render the login - template. -``` -secured.Use(func(next http.Handler) http.Handler { - return authentication.Auth( - // On success; - next, - - // On failure: - func(rw http.ResponseWriter, r *http.Request, err error) { - // Render login form - }) -}) -``` - -A JWT token can be used to initiate an authenticated user -session. This can either happen by calling the login route with a token -provided in a header or via a special cookie containing the JWT token. -For API routes the access is authenticated on every request using the JWT token -and no session is initiated. - -# Login - -The Login function (located in `auth.go`): -* Extracts the user name and gets the user from the user database table. In case the - user is not found the user object is set to nil. -* Iterates over all authenticators and: - - Calls its `CanLogin` function which checks if the authentication method is - supported for this user. - - Calls its `Login` function to authenticate the user. On success a valid user - object is returned. - - Creates a new session object, stores the user attributes in the session and - saves the session. - - Starts the `onSuccess` http handler - -## Local authenticator - -This authenticator is applied if -``` -return user != nil && user.AuthSource == AuthViaLocalPassword -``` - -Compares the password provided by the login form to the password hash stored in -the user database table: -``` -if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil { - log.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username) - return nil, fmt.Errorf("Authentication failed") -} -``` - -## LDAP authenticator - -This authenticator is applied if the user was found in the database and its -AuthSource is LDAP: -``` -if user != nil { - if user.AuthSource == schema.AuthViaLDAP { - return user, true - } -} -``` - -If the option `SyncUserOnLogin` is set it tried to sync the user from the LDAP -directory. In case this succeeds the user is persisted to the database and can -login. - -Gets the LDAP connection and tries a bind with the provided credentials: -``` -if err := l.Bind(userDn, r.FormValue("password")); err != nil { - log.Errorf("AUTH/LDAP > Authentication for user %s failed: %v", user.Username, err) - return nil, fmt.Errorf("Authentication failed") -} -``` - -## JWT Session authenticator - -Login via JWT token will create a session without password. -For login the `X-Auth-Token` header is not supported. This authenticator is -applied if the Authorization header or query parameter login-token is present: -``` - return user, r.Header.Get("Authorization") != "" || - r.URL.Query().Get("login-token") != "" -``` - -The Login function: -* Parses the token and checks if it is expired -* Check if the signing method is EdDSA or HS256 or HS512 -* Check if claims are valid and extracts the claims -* The following claims have to be present: - - `sub`: The subject, in this case this is the username - - `exp`: Expiration in Unix epoch time - - `roles`: String array with roles of user -* In case user does not exist in the database and the option `SyncUserOnLogin` - is set add user to user database table with `AuthViaToken` AuthSource. -* Return valid user object - -## JWT Cookie Session authenticator - -Login via JWT cookie token will create a session without password. -It is first checked if the required configuration options are set: -* `trustedIssuer` -* `CookieName` - -and optionally the environment variable `CROSS_LOGIN_JWT_PUBLIC_KEY` is set. - -This authenticator is applied if the configured cookie is present: -``` - jwtCookie, err := r.Cookie(cookieName) - - if err == nil && jwtCookie.Value != "" { - return true - } -``` - -The Login function: -* Extracts and parses the token -* Checks if signing method is Ed25519/EdDSA -* In case publicKeyCrossLogin is configured: - - Check if `iss` issuer claim matched trusted issuer from configuration - - Return public cross login key - - Otherwise return standard public key -* Check if claims are valid -* Depending on the option `validateUser` the roles are - extracted from JWT token or taken from user object fetched from database -* Ask browser to delete the JWT cookie -* In case user does not exist in the database and the option `SyncUserOnLogin` - is set add user to user database table with `AuthViaToken` AuthSource. -* Return valid user object - -# Auth - -The Auth function (located in `auth.go`): -* Returns a new http handler function that is defined right away -* This handler tries two methods to authenticate a user: - - Via a JWT API token in `AuthViaJWT()` - - Via a valid session in `AuthViaSession()` -* If err is not nil and the user object is valid it puts the user object in the - request context and starts the onSuccess http handler -* Otherwise it calls the onFailure handler - -## AuthViaJWT - -Implemented in JWTAuthenticator: -* Extract token either from header `X-Auth-Token` or `Authorization` with Bearer - prefix -* Parse token and check if it is valid. The Parse routine will also check if the - token is expired. -* If the option `validateUser` is set it will ensure the - user object exists in the database and takes the roles from the database user -* Otherwise the roles are extracted from the roles claim -* Returns a valid user object with AuthType set to AuthToken - -## AuthViaSession - -* Extracts session -* Get values username, projects, and roles from session -* Returns a valid user object with AuthType set to AuthSession diff --git a/docs/dev-frontend.md b/docs/dev-frontend.md deleted file mode 100644 index f1bffd4..0000000 --- a/docs/dev-frontend.md +++ /dev/null @@ -1,33 +0,0 @@ -## Tips for frontend development - -The frontend assets including the Svelte js files are per default embedded in -the bgo binary. To enable a quick turnaround cycle for web development of the -frontend disable embedding of static assets in `config.json`: -``` -"embed-static-files": false, -"static-files": "./web/frontend/public/", - -``` - -Start the node build process (in directory `./web/frontend`) in development mode: -``` -$ npm run dev -``` - -This will start the build process in listen mode. Whenever you change a source -files the depending javascript targets will be automatically rebuild. -In case the javascript files are minified you may need to set the production -flag by hand to false in `./web/frontend/rollup.config.mjs`: -``` -const production = false -``` - -Usually this should work automatically. - -Because the files are still served by ./cc-backend you have to reload the view -explicitly in your browser. - -A common setup is to have three terminals open: -* One running cc-backend (working directory repository root): `./cc-backend -server -dev` -* Another running npm in developer mode (working directory `./web/frontend`): `npm run dev` -* And the last one editing the frontend source files diff --git a/docs/dev-release.md b/docs/dev-release.md deleted file mode 100644 index 30c352f..0000000 --- a/docs/dev-release.md +++ /dev/null @@ -1,13 +0,0 @@ -# Steps to prepare a release - -1. On `hotfix` branch: - * Update ReleaseNotes.md - * Update version in Makefile - * Commit, push, and pull request - * Merge in master - -2. On Linux host: - * Pull master - * Ensure that GitHub Token environment variable `GITHUB_TOKEN` is set - * Create release tag: `git tag v1.1.0 -m release` - * Execute `goreleaser release` diff --git a/docs/dev-testing.md b/docs/dev-testing.md deleted file mode 100644 index 9ca39c3..0000000 --- a/docs/dev-testing.md +++ /dev/null @@ -1,34 +0,0 @@ -## Overview - -We use the standard golang testing environment. - -The following conventions are used: - -* *White box unit tests*: Tests for internal functionality are placed in files -* *Black box unit tests*: Tests for public interfaces are placed in files -with `_test.go` and belong to the package `_test`. -There only exists one package test file per package. -* *Integration tests*: Tests that use multiple componenents are placed in a -package test file. These are named `_test.go` and belong to the -package `_test`. -* *Test assets*: Any required files are placed in a directory `./testdata` -within each package directory. - -## Executing tests - -Visual Studio Code has a very good golang test integration. -For debugging a test this is the recommended solution. - -The Makefile provided by us has a `test` target that executes: -``` -$ go clean -testcache -$ go build ./... -$ go vet ./... -$ go test ./... -``` - -Of course the commands can also be used on the command line. -For details about golang testing refer to the standard documentation: - -* [Testing package](https://pkg.go.dev/testing) -* [go test command](https://pkg.go.dev/cmd/go#hdr-Test_packages) diff --git a/docs/migrateTimestamps.pl b/docs/migrateTimestamps.pl deleted file mode 100755 index cc9e2a2..0000000 --- a/docs/migrateTimestamps.pl +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env perl -use strict; -use warnings; -use utf8; - -use JSON::PP; # from Perl default install -use Time::Local qw( timelocal ); # from Perl default install -use Time::Piece; # from Perl default install - -### JSON -my $json = JSON::PP->new->allow_nonref; - -### TIME AND DATE -# now -my $localtime = localtime; -my $epochtime = $localtime->epoch; -# 5 days ago: Via epoch due to possible reverse month borders -my $epochlessfive = $epochtime - (86400 * 5); -my $locallessfive = localtime($epochlessfive); -# Calc like `date --date 'TZ="Europe/Berlin" 0:00 5 days ago' +%s`) -my ($day, $month, $year) = ($locallessfive->mday, $locallessfive->_mon, $locallessfive->year); -my $checkpointStart = timelocal(0, 0, 0, $day, $month, $year); -# for checkpoints -my $halfday = 43200; - -### JOB-ARCHIVE -my $archiveTarget = './cc-backend/var/job-archive'; -my $archiveSrc = './source-data/job-archive-source'; -my @ArchiveClusters; - -# Gen folder -if ( not -d $archiveTarget ){ - mkdir( $archiveTarget ) or die "Couldn't create $archiveTarget directory, $!"; -} - -# Get clusters by job-archive/$subfolder -opendir my $dh, $archiveSrc or die "can't open directory: $!"; -while ( readdir $dh ) { - chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive'; - my $cluster = $_; - push @ArchiveClusters, $cluster; -} - -# start for jobarchive -foreach my $cluster ( @ArchiveClusters ) { - print "Starting to update start- and stoptimes in job-archive for $cluster\n"; - - my $clusterTarget = "$archiveTarget/$cluster"; - - if ( not -d $clusterTarget ){ - mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!"; - } - - opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!"; - while ( readdir $dhLevel1 ) { - chomp; next if $_ eq '.' or $_ eq '..'; - my $level1 = $_; - - if ( -d "$archiveSrc/$cluster/$level1" ) { - opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!"; - while ( readdir $dhLevel2 ) { - chomp; next if $_ eq '.' or $_ eq '..'; - my $level2 = $_; - my $jobSource = "$archiveSrc/$cluster/$level1/$level2"; - my $jobOrigin = "$jobSource"; - my $jobTargetL1 = "$clusterTarget/$level1"; - my $jobTargetL2 = "$jobTargetL1/$level2"; - - # check if files are directly accessible (old format) else get subfolders as file and update path - if ( ! -e "$jobSource/meta.json") { - opendir(D, "$jobSource") || die "Can't open directory $jobSource: $!\n"; - my @folders = readdir(D); - closedir(D); - if (!@folders) { - next; - } - - foreach my $folder ( @folders ) { - next if $folder eq '.' or $folder eq '..'; - $jobSource = "$jobSource/".$folder; - } - } - # check if subfolder contains file, else skip - if ( ! -e "$jobSource/meta.json") { - print "$jobSource skipped\n"; - next; - } - - open my $metafh, '<', "$jobSource/meta.json" or die "Can't open file $!"; - my $rawstr = do { local $/; <$metafh> }; - close($metafh); - my $metadata = $json->decode($rawstr); - - # NOTE Start meta.json iteration here - # my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT; - # Set new startTime: Between 5 days and 1 day before now - - # Remove id from attributes - $metadata->{startTime} = $epochtime - (int(rand(432000)) + 86400); - $metadata->{stopTime} = $metadata->{startTime} + $metadata->{duration}; - - # Add starttime subfolder to target path - my $jobTargetL3 = "$jobTargetL2/".$metadata->{startTime}; - - if ( not -d $jobTargetL1 ){ - mkdir( $jobTargetL1 ) or die "Couldn't create $jobTargetL1 directory, $!"; - } - - if ( not -d $jobTargetL2 ){ - mkdir( $jobTargetL2 ) or die "Couldn't create $jobTargetL2 directory, $!"; - } - - # target is not directory - if ( not -d $jobTargetL3 ){ - mkdir( $jobTargetL3 ) or die "Couldn't create $jobTargetL3 directory, $!"; - - my $outstr = $json->encode($metadata); - open my $metaout, '>', "$jobTargetL3/meta.json" or die "Can't write to file $!"; - print $metaout $outstr; - close($metaout); - - open my $datafh, '<', "$jobSource/data.json" or die "Can't open file $!"; - my $datastr = do { local $/; <$datafh> }; - close($datafh); - - open my $dataout, '>', "$jobTargetL3/data.json" or die "Can't write to file $!"; - print $dataout $datastr; - close($dataout); - } - } - } - } -} -print "Done for job-archive\n"; -sleep(1); -exit; - -## CHECKPOINTS -my $checkpTarget = './cc-metric-store/var/checkpoints'; -my $checkpSource = './source-data/cc-metric-store-source/checkpoints'; -my @CheckpClusters; - -# Gen folder -if ( not -d $checkpTarget ){ - mkdir( $checkpTarget ) or die "Couldn't create $checkpTarget directory, $!"; -} - -# Get clusters by cc-metric-store/$subfolder -opendir my $dhc, $checkpSource or die "can't open directory: $!"; -while ( readdir $dhc ) { - chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive'; - my $cluster = $_; - push @CheckpClusters, $cluster; -} -closedir($dhc); - -# start for checkpoints -foreach my $cluster ( @CheckpClusters ) { - print "Starting to update checkpoint filenames and data starttimes for $cluster\n"; - - my $clusterTarget = "$checkpTarget/$cluster"; - - if ( not -d $clusterTarget ){ - mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!"; - } - - opendir my $dhLevel1, "$checkpSource/$cluster" or die "can't open directory: $!"; - while ( readdir $dhLevel1 ) { - chomp; next if $_ eq '.' or $_ eq '..'; - # Nodename as level1-folder - my $level1 = $_; - - if ( -d "$checkpSource/$cluster/$level1" ) { - - my $nodeSource = "$checkpSource/$cluster/$level1/"; - my $nodeOrigin = "$nodeSource"; - my $nodeTarget = "$clusterTarget/$level1"; - my @files; - - if ( -e "$nodeSource/1609459200.json") { # 1609459200 == First Checkpoint time in latest dump - opendir(D, "$nodeSource") || die "Can't open directory $nodeSource: $!\n"; - while ( readdir D ) { - chomp; next if $_ eq '.' or $_ eq '..'; - my $nodeFile = $_; - push @files, $nodeFile; - } - closedir(D); - my $length = @files; - if (!@files || $length != 14) { # needs 14 files == 7 days worth of data - next; - } - } else { - next; - } - - # sort for integer timestamp-filename-part (moduleless): Guarantees start with index == 0 == 1609459200.json - my @sortedFiles = sort { ($a =~ /^([0-9]{10}).json$/)[0] <=> ($b =~ /^([0-9]{10}).json$/)[0] } @files; - - if ( not -d $nodeTarget ){ - mkdir( $nodeTarget ) or die "Couldn't create $nodeTarget directory, $!"; - - while (my ($index, $file) = each(@sortedFiles)) { - open my $checkfh, '<', "$nodeSource/$file" or die "Can't open file $!"; - my $rawstr = do { local $/; <$checkfh> }; - close($checkfh); - my $checkpdata = $json->decode($rawstr); - - my $newTimestamp = $checkpointStart + ($index * $halfday); - # Get Diff from old Timestamp - my $timeDiff = $newTimestamp - $checkpdata->{from}; - # Set new timestamp - $checkpdata->{from} = $newTimestamp; - - foreach my $metric (keys %{$checkpdata->{metrics}}) { - $checkpdata->{metrics}->{$metric}->{start} += $timeDiff; - } - - my $outstr = $json->encode($checkpdata); - - open my $checkout, '>', "$nodeTarget/$newTimestamp.json" or die "Can't write to file $!"; - print $checkout $outstr; - close($checkout); - } - } - } - } - closedir($dhLevel1); -} -print "Done for checkpoints\n"; diff --git a/docs/searchbar.md b/docs/searchbar.md deleted file mode 100644 index 0ecef53..0000000 --- a/docs/searchbar.md +++ /dev/null @@ -1,36 +0,0 @@ -# Docs for ClusterCockpit Searchbar - -## Usage - -* Searchtags are implemented as `type:` search-string - * Types `jobId, jobName, projectId, username, name, arrayJobId` for roles `admin` and `support` - * `jobName` is jobName as persisted in `job.meta_data` table-column - * `username` is actual account identifier as persisted in `job.user` table-column - * `name` is account owners name as persisted in `user.name` table-column - * Types `jobId, jobName, projectId, arrayJobId` for role `user` - * Examples: - * `jobName:myJob12` - * `jobId:123456` - * `username:abcd100` - * `name:Paul` -* If no searchTag used: Best guess search with the following hierarchy - * `jobId -> username -> name -> projectId -> jobName` -* Destinations: - * JobId: Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters) - * JobName: Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters) - * ProjectId: Job-Table - * Username: Users-Table - * **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users. - * Name: Users-Table - * **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users. - * ArrayJobId: Job-Table (Lists all Jobs of Queried ArrayJobId) - * Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match) - * Unprocessable queries will display messages detailing the cause (Info, Warning, Error) -* Spaces trimmed (both for searchTag and queryString) - * ` job12` == `job12` - * `projectID : abcd ` == `projectId:abcd` -* `jobName`- and `name-`queries work with a part of the target-string - * `jobName:myjob` for jobName "myjob_cluster1" - * `name:Paul` for name "Paul Atreides" - -* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB. diff --git a/internal/metricdata/metricdata.go b/internal/metricdata/metricdata.go index 3117f8c..f48b348 100644 --- a/internal/metricdata/metricdata.go +++ b/internal/metricdata/metricdata.go @@ -38,7 +38,6 @@ var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepos var useArchive bool func Init(disableArchive bool) error { - useArchive = !disableArchive for _, cluster := range config.Keys.Clusters { if cluster.MetricDataRepository != nil { @@ -80,7 +79,8 @@ var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, - ctx context.Context) (schema.JobData, error) { + ctx context.Context, +) (schema.JobData, error) { data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) { var jd schema.JobData var err error @@ -109,7 +109,8 @@ func LoadData(job *schema.Job, jd, err = repo.LoadData(job, metrics, scopes, ctx) if err != nil { if len(jd) != 0 { - log.Warnf("partial error: %s", err.Error()) + log.Errorf("partial error: %s", err.Error()) + return err, 0, 0 } else { log.Error("Error while loading job data from metric repository") return err, 0, 0 @@ -179,8 +180,8 @@ func LoadAverages( job *schema.Job, metrics []string, data [][]schema.Float, - ctx context.Context) error { - + ctx context.Context, +) error { if job.State != schema.JobStateRunning && useArchive { return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here? } @@ -219,8 +220,8 @@ func LoadNodeData( metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, - ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { - + ctx context.Context, +) (map[string]map[string][]*schema.JobMetric, error) { repo, ok := metricDataRepos[cluster] if !ok { return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster) @@ -252,8 +253,8 @@ func LoadNodeData( func cacheKey( job *schema.Job, metrics []string, - scopes []schema.MetricScope) string { - + scopes []schema.MetricScope, +) string { // Duration and StartTime do not need to be in the cache key as StartTime is less unique than // job.ID and the TTL of the cache entry makes sure it does not stay there forever. return fmt.Sprintf("%d(%s):[%v],[%v]", @@ -267,8 +268,8 @@ func cacheKey( func prepareJobData( job *schema.Job, jobData schema.JobData, - scopes []schema.MetricScope) { - + scopes []schema.MetricScope, +) { const maxSeriesSize int = 15 for _, scopes := range jobData { for _, jm := range scopes { @@ -295,7 +296,6 @@ func prepareJobData( // Writes a running job to the job-archive func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) { - allMetrics := make([]string, 0) metricConfigs := archive.GetCluster(job.Cluster).MetricConfig for _, mc := range metricConfigs { diff --git a/web/frontend/src/JobFootprint.svelte b/web/frontend/src/JobFootprint.svelte index e860e04..30034c0 100644 --- a/web/frontend/src/JobFootprint.svelte +++ b/web/frontend/src/JobFootprint.svelte @@ -106,19 +106,19 @@ }) function evalFootprint(metric, mean, thresholds, level) { - // mem_used has inverse logic regarding threshold levels + // mem_used has inverse logic regarding threshold levels, notify levels triggered if mean > threshold switch (level) { case 'peak': - if (metric === 'mem_used') return (mean <= thresholds.peak && mean > thresholds.alert) + if (metric === 'mem_used') return false // mem_used over peak -> return false to trigger impact -1 else return (mean <= thresholds.peak && mean > thresholds.normal) case 'alert': - if (metric === 'mem_used') return (mean <= thresholds.alert && mean > thresholds.caution) + if (metric === 'mem_used') return (mean <= thresholds.peak && mean >= thresholds.alert) else return (mean <= thresholds.alert && mean >= 0) case 'caution': - if (metric === 'mem_used') return (mean <= thresholds.caution && mean > thresholds.normal) + if (metric === 'mem_used') return (mean < thresholds.alert && mean >= thresholds.caution) else return (mean <= thresholds.caution && mean > thresholds.alert) case 'normal': - if (metric === 'mem_used') return (mean <= thresholds.normal && mean >= 0) + if (metric === 'mem_used') return (mean < thresholds.caution && mean >= 0) else return (mean <= thresholds.normal && mean > thresholds.caution) default: return false diff --git a/web/frontend/src/NavbarTools.svelte b/web/frontend/src/NavbarTools.svelte index 2bf9aca..f6ded90 100644 --- a/web/frontend/src/NavbarTools.svelte +++ b/web/frontend/src/NavbarTools.svelte @@ -30,7 +30,7 @@ style="margin-left: 10px;" /> - + + + + + + + + {#if username}
@@ -50,43 +67,29 @@ color="success" type="submit" style="margin-left: 10px;" + title="Logout {username}" > {#if screenSize > 1630} - Logout {username} + Logout {username} {:else} - + {/if}
{/if} - - - {:else} - - -
- - +
- + + +
+ +
+
diff --git a/web/frontend/src/joblist/Row.svelte b/web/frontend/src/joblist/Row.svelte index 71bc805..4d9013c 100644 --- a/web/frontend/src/joblist/Row.svelte +++ b/web/frontend/src/joblist/Row.svelte @@ -163,6 +163,7 @@ subCluster={job.subCluster} isShared={(job.exclusive != 1)} resources={job.resources} + hwthreads={job.numHWThreads} /> {:else if metric.disabled == true && metric.data} Metric disabled for subcluster {metric.data.name}:{job.subCluster} diff --git a/web/frontend/src/plots/MetricPlot.svelte b/web/frontend/src/plots/MetricPlot.svelte index 8d7825d..7bd264c 100644 --- a/web/frontend/src/plots/MetricPlot.svelte +++ b/web/frontend/src/plots/MetricPlot.svelte @@ -39,6 +39,7 @@ export let subCluster export let isShared = false export let forNode = false + export let hwthreads = 0 if (useStatsSeries == null) useStatsSeries = statisticsSeries != null @@ -53,7 +54,7 @@ const lineWidth = clusterCockpitConfig.plot_general_lineWidth / window.devicePixelRatio const lineColors = clusterCockpitConfig.plot_general_colorscheme const backgroundColors = { normal: 'rgba(255, 255, 255, 1.0)', caution: 'rgba(255, 128, 0, 0.3)', alert: 'rgba(255, 0, 0, 0.3)' } - const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster) + const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster, isShared, hwthreads) // converts the legend into a simple tooltip function legendAsTooltipPlugin({ className, style = { backgroundColor:"rgba(255, 249, 196, 0.92)", color: "black" } } = {}) { @@ -380,14 +381,14 @@ } } - export function findThresholds(metricConfig, scope, subCluster) { + export function findThresholds(metricConfig, scope, subCluster, isShared, hwthreads) { // console.log('NAME ' + metricConfig.name + ' / SCOPE ' + scope + ' / SUBCLUSTER ' + subCluster.name) if (!metricConfig || !scope || !subCluster) { console.warn('Argument missing for findThresholds!') return null } - if (scope == 'node' || metricConfig.aggregation == 'avg') { + if ((scope == 'node' && isShared == false) || metricConfig.aggregation == 'avg') { if (metricConfig.subClusters && metricConfig.subClusters.length === 0) { // console.log('subClusterConfigs array empty, use metricConfig defaults') return { normal: metricConfig.normal, caution: metricConfig.caution, alert: metricConfig.alert, peak: metricConfig.peak } @@ -408,7 +409,9 @@ } let divisor = 1 - if (scope == 'socket') + if (isShared == true && hwthreads > 0) { // Shared + divisor = subCluster.topology.node.length / hwthreads + } else if (scope == 'socket') divisor = subCluster.topology.socket.length else if (scope == 'core') divisor = subCluster.topology.core.length