mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-01 19:23:50 +02:00
Merge branch 'dev' into 134-job-tagging
This commit is contained in:
commit
13386175f5
331
.github/workflows/Release.yml
vendored
331
.github/workflows/Release.yml
vendored
@ -1,331 +0,0 @@
|
||||
# See: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions
|
||||
|
||||
# Workflow name
|
||||
name: Release
|
||||
|
||||
# Run on tag push
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
|
||||
jobs:
|
||||
|
||||
#
|
||||
# Build on AlmaLinux 8.5 using golang-1.18.2
|
||||
#
|
||||
AlmaLinux-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://hub.docker.com/_/almalinux
|
||||
container: almalinux:8.5
|
||||
# The job outputs link to the outputs of the 'rpmrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
rpm : ${{steps.rpmrename.outputs.RPM}}
|
||||
srpm : ${{steps.rpmrename.outputs.SRPM}}
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
dnf --assumeyes group install "Development Tools" "RPM Development Tools"
|
||||
dnf --assumeyes install wget openssl-devel diffutils delve which npm
|
||||
dnf --assumeyes install 'dnf-command(builddep)'
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# Use dnf to install build dependencies
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
||||
rpm -i go*.rpm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
||||
|
||||
- name: RPM build ClusterCockpit
|
||||
id: rpmbuild
|
||||
run: make RPM
|
||||
|
||||
# AlmaLinux 8.5 is a derivate of RedHat Enterprise Linux 8 (UBI8),
|
||||
# so the created RPM both contain the substring 'el8' in the RPM file names
|
||||
# This step replaces the substring 'el8' to 'alma85'. It uses the move operation
|
||||
# because it is unclear whether the default AlmaLinux 8.5 container contains the
|
||||
# 'rename' command. This way we also get the new names for output.
|
||||
- name: Rename RPMs (s/el8/alma85/)
|
||||
id: rpmrename
|
||||
run: |
|
||||
OLD_RPM="${{steps.rpmbuild.outputs.RPM}}"
|
||||
OLD_SRPM="${{steps.rpmbuild.outputs.SRPM}}"
|
||||
NEW_RPM="${OLD_RPM/el8/alma85}"
|
||||
NEW_SRPM=${OLD_SRPM/el8/alma85}
|
||||
mv "${OLD_RPM}" "${NEW_RPM}"
|
||||
mv "${OLD_SRPM}" "${NEW_SRPM}"
|
||||
echo "::set-output name=SRPM::${NEW_SRPM}"
|
||||
echo "::set-output name=RPM::${NEW_RPM}"
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for AlmaLinux 8.5
|
||||
path: ${{ steps.rpmrename.outputs.RPM }}
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for AlmaLinux 8.5
|
||||
path: ${{ steps.rpmrename.outputs.SRPM }}
|
||||
|
||||
#
|
||||
# Build on UBI 8 using golang-1.18.2
|
||||
#
|
||||
UBI-8-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
|
||||
container: registry.access.redhat.com/ubi8/ubi:8.5-226.1645809065
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
outputs:
|
||||
rpm : ${{steps.rpmbuild.outputs.RPM}}
|
||||
srpm : ${{steps.rpmbuild.outputs.SRPM}}
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros rpm-build-libs rpm-libs gcc make python38 git wget openssl-devel diffutils delve which
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# Use dnf to install build dependencies
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
||||
rpm -i go*.rpm
|
||||
dnf --assumeyes --disableplugin=subscription-manager install npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
||||
|
||||
- name: RPM build ClusterCockpit
|
||||
id: rpmbuild
|
||||
run: make RPM
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.RPM }}
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||
|
||||
#
|
||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
||||
#
|
||||
Ubuntu-focal-build:
|
||||
runs-on: ubuntu-latest
|
||||
container: ubuntu:20.04
|
||||
# The job outputs link to the outputs of the 'debrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
deb : ${{steps.debrename.outputs.DEB}}
|
||||
steps:
|
||||
# Use apt to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
apt update && apt --assume-yes upgrade
|
||||
apt --assume-yes install build-essential sed git wget bash
|
||||
apt --assume-yes install npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
- name: Install Golang
|
||||
run: |
|
||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
go version
|
||||
- name: DEB build ClusterCockpit
|
||||
id: dpkg-build
|
||||
run: |
|
||||
ls -la
|
||||
pwd
|
||||
env
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
git config --global --add safe.directory $(pwd)
|
||||
make DEB
|
||||
- name: Rename DEB (add '_ubuntu20.04')
|
||||
id: debrename
|
||||
run: |
|
||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
|
||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 20.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
|
||||
#
|
||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
||||
#
|
||||
Ubuntu-jammy-build:
|
||||
runs-on: ubuntu-latest
|
||||
container: ubuntu:22.04
|
||||
# The job outputs link to the outputs of the 'debrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
deb : ${{steps.debrename.outputs.DEB}}
|
||||
steps:
|
||||
# Use apt to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
apt update && apt --assume-yes upgrade
|
||||
apt --assume-yes install build-essential sed git wget bash npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
- name: Install Golang
|
||||
run: |
|
||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
go version
|
||||
- name: DEB build ClusterCockpit
|
||||
id: dpkg-build
|
||||
run: |
|
||||
ls -la
|
||||
pwd
|
||||
env
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
git config --global --add safe.directory $(pwd)
|
||||
make DEB
|
||||
- name: Rename DEB (add '_ubuntu22.04')
|
||||
id: debrename
|
||||
run: |
|
||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu22.04.deb"
|
||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 22.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
|
||||
#
|
||||
# Create release with fresh RPMs
|
||||
#
|
||||
Release:
|
||||
runs-on: ubuntu-latest
|
||||
# We need the RPMs, so add dependency
|
||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build, Ubuntu-jammy-build]
|
||||
|
||||
steps:
|
||||
# See: https://github.com/actions/download-artifact
|
||||
- name: Download AlmaLinux 8.5 RPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for AlmaLinux 8.5
|
||||
- name: Download AlmaLinux 8.5 SRPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for AlmaLinux 8.5
|
||||
|
||||
- name: Download UBI 8 RPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for UBI 8
|
||||
- name: Download UBI 8 SRPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for UBI 8
|
||||
|
||||
- name: Download Ubuntu 20.04 DEB
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 20.04
|
||||
|
||||
- name: Download Ubuntu 22.04 DEB
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 22.04
|
||||
|
||||
# The download actions do not publish the name of the downloaded file,
|
||||
# so we re-use the job outputs of the parent jobs. The files are all
|
||||
# downloaded to the current folder.
|
||||
# The gh-release action afterwards does not accept file lists but all
|
||||
# files have to be listed at 'files'. The step creates one output per
|
||||
# RPM package (2 per distro)
|
||||
- name: Set RPM variables
|
||||
id: files
|
||||
run: |
|
||||
ALMA_85_RPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.rpm}}")
|
||||
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
||||
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
||||
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
||||
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
|
||||
U_2204_DEB=$(basename "${{ needs.Ubuntu-jammy-build.outputs.deb}}")
|
||||
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "U_2004_DEB::${U_2004_DEB}"
|
||||
echo "U_2204_DEB::${U_2204_DEB}"
|
||||
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
|
||||
echo "::set-output name=U_2204_DEB::${U_2204_DEB}"
|
||||
|
||||
# See: https://github.com/softprops/action-gh-release
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
name: cc-backend-${{github.ref_name}}
|
||||
files: |
|
||||
${{ steps.files.outputs.ALMA_85_RPM }}
|
||||
${{ steps.files.outputs.ALMA_85_SRPM }}
|
||||
${{ steps.files.outputs.UBI_8_RPM }}
|
||||
${{ steps.files.outputs.UBI_8_SRPM }}
|
||||
${{ steps.files.outputs.U_2004_DEB }}
|
||||
${{ steps.files.outputs.U_2204_DEB }}
|
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@ -7,7 +7,7 @@ jobs:
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.20.x
|
||||
go-version: 1.24.x
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Build, Vet & Test
|
||||
|
20
.gitignore
vendored
20
.gitignore
vendored
@ -1,19 +1,23 @@
|
||||
/cc-backend
|
||||
|
||||
/var/job-archive
|
||||
/var/*.db
|
||||
/var/machine-state
|
||||
|
||||
/.env
|
||||
/config.json
|
||||
|
||||
/var/job-archive
|
||||
/var/machine-state
|
||||
/var/job.db-shm
|
||||
/var/job.db-wal
|
||||
/var/*.db
|
||||
/var/*.txt
|
||||
|
||||
/web/frontend/public/build
|
||||
/web/frontend/node_modules
|
||||
/.vscode/*
|
||||
|
||||
/archive-migration
|
||||
/archive-manager
|
||||
var/job.db-shm
|
||||
var/job.db-wal
|
||||
|
||||
/internal/repository/testdata/job.db-shm
|
||||
/internal/repository/testdata/job.db-wal
|
||||
|
||||
/.vscode/*
|
||||
dist/
|
||||
*.db
|
||||
|
@ -34,19 +34,6 @@ builds:
|
||||
main: ./tools/archive-manager
|
||||
tags:
|
||||
- static_build
|
||||
- env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
goamd64:
|
||||
- v3
|
||||
id: "archive-migration"
|
||||
binary: archive-migration
|
||||
main: ./tools/archive-migration
|
||||
tags:
|
||||
- static_build
|
||||
- env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
@ -70,7 +57,7 @@ archives:
|
||||
{{- else }}{{ .Arch }}{{ end }}
|
||||
{{- if .Arm }}v{{ .Arm }}{{ end }}
|
||||
checksum:
|
||||
name_template: 'checksums.txt'
|
||||
name_template: "checksums.txt"
|
||||
snapshot:
|
||||
name_template: "{{ incpatch .Version }}-next"
|
||||
changelog:
|
||||
@ -100,7 +87,7 @@ changelog:
|
||||
release:
|
||||
draft: false
|
||||
footer: |
|
||||
Supports job archive version 1 and database version 6.
|
||||
Supports job archive version 2 and database version 8.
|
||||
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
|
||||
|
||||
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
|
||||
|
35
Makefile
35
Makefile
@ -2,7 +2,7 @@ TARGET = ./cc-backend
|
||||
VAR = ./var
|
||||
CFG = config.json .env
|
||||
FRONTEND = ./web/frontend
|
||||
VERSION = 1.3.0
|
||||
VERSION = 1.4.4
|
||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
||||
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
||||
@ -22,13 +22,23 @@ SVELTE_COMPONENTS = status \
|
||||
header
|
||||
|
||||
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
|
||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/header/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/job/*.svelte)
|
||||
|
||||
.PHONY: clean distclean test tags frontend $(TARGET)
|
||||
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
|
||||
|
||||
.NOTPARALLEL:
|
||||
|
||||
@ -40,6 +50,15 @@ frontend:
|
||||
$(info ===> BUILD frontend)
|
||||
cd web/frontend && npm install && npm run build
|
||||
|
||||
swagger:
|
||||
$(info ===> GENERATE swagger)
|
||||
@go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api
|
||||
@mv ./api/docs.go ./internal/api/docs.go
|
||||
|
||||
graphql:
|
||||
$(info ===> GENERATE graphql)
|
||||
@go run github.com/99designs/gqlgen
|
||||
|
||||
clean:
|
||||
$(info ===> CLEAN)
|
||||
@go clean
|
||||
@ -63,7 +82,7 @@ tags:
|
||||
@ctags -R
|
||||
|
||||
$(VAR):
|
||||
@mkdir $(VAR)
|
||||
@mkdir -p $(VAR)
|
||||
|
||||
config.json:
|
||||
$(info ===> Initialize config.json file)
|
||||
|
@ -65,7 +65,7 @@ cd ./cc-backend
|
||||
./startDemo.sh
|
||||
```
|
||||
|
||||
You can also try the demo using the lates release binary.
|
||||
You can also try the demo using the latest release binary.
|
||||
Create a folder and put the release binary `cc-backend` into this folder.
|
||||
Execute the following steps:
|
||||
|
||||
@ -88,7 +88,9 @@ Analysis, Systems and Status views).
|
||||
There is a Makefile to automate the build of cc-backend. The Makefile supports
|
||||
the following targets:
|
||||
|
||||
* `make`: Initialize `var` directory and build svelte frontend and backend binary. Note that there is no proper prerequesite handling. Any change of frontend source files will result in a complete rebuild.
|
||||
* `make`: Initialize `var` directory and build svelte frontend and backend
|
||||
binary. Note that there is no proper prerequisite handling. Any change of
|
||||
frontend source files will result in a complete rebuild.
|
||||
* `make clean`: Clean go build cache and remove binary.
|
||||
* `make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||
|
||||
@ -147,8 +149,6 @@ contains Go packages that can be used by other projects.
|
||||
Additional command line helper tools.
|
||||
* [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||
Commands for getting infos about and existing job archive.
|
||||
* [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration)
|
||||
Tool to migrate from previous to current job archive version.
|
||||
* [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||
Tool to convert external pubkey for use in `cc-backend`.
|
||||
* [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||
|
@ -1,12 +1,47 @@
|
||||
# `cc-backend` version 1.3.0
|
||||
# `cc-backend` version 1.4.4
|
||||
|
||||
Supports job archive version 1 and database version 7.
|
||||
Supports job archive version 2 and database version 8.
|
||||
|
||||
This is a minor release of `cc-backend`, the API backend and frontend
|
||||
This is a bug fix release of `cc-backend`, the API backend and frontend
|
||||
implementation of ClusterCockpit.
|
||||
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
|
||||
|
||||
## Breaking changes
|
||||
|
||||
* This release fixes bugs in the MySQL/MariaDB database schema. For this reason
|
||||
you have to migrate your database using the `-migrate-db` switch.
|
||||
The option `apiAllowedIPs` is now a required configuration attribute in
|
||||
`config.json`. This option restricts access to the admin API.
|
||||
|
||||
To retain the previous behavior that the API is per default accessible from
|
||||
everywhere set:
|
||||
|
||||
```json
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
]
|
||||
```
|
||||
|
||||
## Breaking changes for minor release 1.4.x
|
||||
|
||||
- You need to perform a database migration. Depending on your database size the
|
||||
migration might require several hours!
|
||||
- You need to adapt the `cluster.json` configuration files in the job-archive,
|
||||
add new required attributes to the metric list and after that edit
|
||||
`./job-archive/version.txt` to version 2. Only metrics that have the footprint
|
||||
attribute set can be filtered and show up in the footprint UI and polar plot.
|
||||
- Continuous scrolling is default now in all job lists. You can change this back
|
||||
to paging globally, also every user can configure to use paging or continuous
|
||||
scrolling individually.
|
||||
- Tags have a scope now. Existing tags will get global scope in the database
|
||||
migration.
|
||||
|
||||
## New features
|
||||
|
||||
- Enable to delete tags from the web interface
|
||||
|
||||
## Known issues
|
||||
|
||||
- Currently energy footprint metrics of type energy are ignored for calculating
|
||||
total energy.
|
||||
- Resampling for running jobs only works with cc-metric-store
|
||||
- With energy footprint metrics of type power the unit is ignored and it is
|
||||
assumed the metric has the unit Watt.
|
||||
|
@ -18,6 +18,7 @@ type Job {
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
energy: Float!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
partition: String!
|
||||
@ -27,12 +28,8 @@ type Job {
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
concurrentJobs: JobLinkResultList
|
||||
|
||||
memUsedMax: Float
|
||||
flopsAnyAvg: Float
|
||||
memBwAvg: Float
|
||||
loadAvg: Float
|
||||
|
||||
footprint: [FootprintValue]
|
||||
energyFootprint: [EnergyFootprintValue]
|
||||
metaData: Any
|
||||
userData: User
|
||||
}
|
||||
@ -45,7 +42,6 @@ type JobLink {
|
||||
type Cluster {
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
metricConfig: [MetricConfig!]!
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
}
|
||||
|
||||
@ -61,9 +57,24 @@ type SubCluster {
|
||||
flopRateSimd: MetricValue!
|
||||
memoryBandwidth: MetricValue!
|
||||
topology: Topology!
|
||||
metricConfig: [MetricConfig!]!
|
||||
footprint: [String!]!
|
||||
}
|
||||
|
||||
type FootprintValue {
|
||||
name: String!
|
||||
stat: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type EnergyFootprintValue {
|
||||
hardware: String!
|
||||
metric: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type MetricValue {
|
||||
name: String
|
||||
unit: Unit!
|
||||
value: Float!
|
||||
}
|
||||
@ -102,6 +113,7 @@ type MetricConfig {
|
||||
normal: Float
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
lowerIsBetter: Boolean
|
||||
subClusters: [SubClusterConfig!]!
|
||||
}
|
||||
|
||||
@ -109,6 +121,7 @@ type Tag {
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
scope: String!
|
||||
}
|
||||
|
||||
type Resource {
|
||||
@ -138,6 +151,43 @@ type Series {
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
median: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type NamedStatsWithScope {
|
||||
name: String!
|
||||
scope: MetricScope!
|
||||
stats: [ScopedStats!]!
|
||||
}
|
||||
|
||||
type ScopedStats {
|
||||
hostname: String!
|
||||
id: String
|
||||
data: MetricStatistics!
|
||||
}
|
||||
|
||||
type JobStats {
|
||||
id: Int!
|
||||
jobId: String!
|
||||
startTime: Int!
|
||||
duration: Int!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int
|
||||
numAccelerators: Int
|
||||
stats: [NamedStats!]!
|
||||
}
|
||||
|
||||
type NamedStats {
|
||||
name: String!
|
||||
data: MetricStatistics!
|
||||
}
|
||||
|
||||
type Unit {
|
||||
base: String!
|
||||
prefix: String
|
||||
@ -149,12 +199,6 @@ type MetricStatistics {
|
||||
max: Float!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
metric: String!
|
||||
data: [NullableFloat!]!
|
||||
@ -180,6 +224,28 @@ type NodeMetrics {
|
||||
metrics: [JobMetricWithName!]!
|
||||
}
|
||||
|
||||
type NodesResultList {
|
||||
items: [NodeMetrics!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
totalNodes: Int
|
||||
hasNextPage: Boolean
|
||||
}
|
||||
|
||||
type ClusterSupport {
|
||||
cluster: String!
|
||||
subClusters: [String!]!
|
||||
}
|
||||
|
||||
type GlobalMetricListItem {
|
||||
name: String!
|
||||
unit: Unit!
|
||||
scope: MetricScope!
|
||||
footprint: String
|
||||
availability: [ClusterSupport!]!
|
||||
}
|
||||
|
||||
type Count {
|
||||
name: String!
|
||||
count: Int!
|
||||
@ -191,39 +257,51 @@ type User {
|
||||
email: String!
|
||||
}
|
||||
|
||||
input MetricStatItem {
|
||||
metricName: String!
|
||||
range: FloatRange!
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
globalMetrics: [GlobalMetricListItem!]!
|
||||
|
||||
user(username: String!): User
|
||||
allocatedNodes(cluster: String!): [Count!]!
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
|
||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||
scopedJobStats(id: ID!, metrics: [String!], scopes: [MetricScope!]): [NamedStatsWithScope!]!
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
|
||||
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]!
|
||||
jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
|
||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||
|
||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||
nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList!
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createTag(type: String!, name: String!): Tag!
|
||||
createTag(type: String!, name: String!, scope: String!): Tag!
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagFromList(tagIds: [ID!]!): [Int!]!
|
||||
|
||||
updateConfiguration(name: String!, value: String!): String
|
||||
}
|
||||
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
type TimeRangeOutput { range: String, from: Time!, to: Time! }
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
dbId: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
@ -232,6 +310,7 @@ input JobFilter {
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
energy: FloatRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
@ -241,17 +320,14 @@ input JobFilter {
|
||||
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
flopsAnyAvg: FloatRange
|
||||
memBwAvg: FloatRange
|
||||
loadAvg: FloatRange
|
||||
memUsedMax: FloatRange
|
||||
|
||||
metricStats: [MetricStatItem!]
|
||||
exclusive: Int
|
||||
node: StringInput
|
||||
}
|
||||
|
||||
input OrderByInput {
|
||||
field: String!
|
||||
type: String!,
|
||||
order: SortDirectionEnum! = ASC
|
||||
}
|
||||
|
||||
@ -269,9 +345,13 @@ input StringInput {
|
||||
in: [String!]
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input FloatRange { from: Float!, to: Float! }
|
||||
input TimeRange { from: Time, to: Time }
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input TimeRange { range: String, from: Time, to: Time }
|
||||
|
||||
input FloatRange {
|
||||
from: Float!
|
||||
to: Float!
|
||||
}
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
@ -295,6 +375,7 @@ type HistoPoint {
|
||||
type MetricHistoPoints {
|
||||
metric: String!
|
||||
unit: String!
|
||||
stat: String
|
||||
data: [MetricHistoPoint!]
|
||||
}
|
||||
|
||||
|
620
api/swagger.json
620
api/swagger.json
File diff suppressed because it is too large
Load Diff
471
api/swagger.yaml
471
api/swagger.yaml
@ -1,4 +1,3 @@
|
||||
basePath: /api
|
||||
definitions:
|
||||
api.ApiReturnedUser:
|
||||
properties:
|
||||
@ -23,11 +22,20 @@ definitions:
|
||||
description: Tag Name
|
||||
example: Testjob
|
||||
type: string
|
||||
scope:
|
||||
description: Tag Scope for Frontend Display
|
||||
example: global
|
||||
type: string
|
||||
type:
|
||||
description: Tag Type
|
||||
example: Debug
|
||||
type: string
|
||||
type: object
|
||||
api.DefaultJobApiResponse:
|
||||
properties:
|
||||
msg:
|
||||
type: string
|
||||
type: object
|
||||
api.DeleteJobApiRequest:
|
||||
properties:
|
||||
cluster:
|
||||
@ -45,11 +53,6 @@ definitions:
|
||||
required:
|
||||
- jobId
|
||||
type: object
|
||||
api.DeleteJobApiResponse:
|
||||
properties:
|
||||
msg:
|
||||
type: string
|
||||
type: object
|
||||
api.EditMetaRequest:
|
||||
properties:
|
||||
key:
|
||||
@ -108,33 +111,22 @@ definitions:
|
||||
scope:
|
||||
$ref: '#/definitions/schema.MetricScope'
|
||||
type: object
|
||||
api.StartJobApiResponse:
|
||||
properties:
|
||||
id:
|
||||
description: Database ID of new job
|
||||
type: integer
|
||||
type: object
|
||||
api.StopJobApiRequest:
|
||||
properties:
|
||||
cluster:
|
||||
description: Cluster of job
|
||||
example: fritz
|
||||
type: string
|
||||
jobId:
|
||||
description: Cluster Job ID of job
|
||||
example: 123000
|
||||
type: integer
|
||||
jobState:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
description: Final job state
|
||||
example: completed
|
||||
startTime:
|
||||
description: Start Time of job as epoch
|
||||
example: 1649723812
|
||||
type: integer
|
||||
stopTime:
|
||||
description: Stop Time of job as epoch
|
||||
example: 1649763839
|
||||
type: integer
|
||||
required:
|
||||
@ -167,42 +159,40 @@ definitions:
|
||||
description: Information of a HPC job.
|
||||
properties:
|
||||
arrayJobId:
|
||||
description: The unique identifier of an array job
|
||||
example: 123000
|
||||
type: integer
|
||||
cluster:
|
||||
description: The unique identifier of a cluster
|
||||
example: fritz
|
||||
type: string
|
||||
concurrentJobs:
|
||||
$ref: '#/definitions/schema.JobLinkResultList'
|
||||
duration:
|
||||
description: Duration of job in seconds (Min > 0)
|
||||
example: 43200
|
||||
minimum: 1
|
||||
type: integer
|
||||
energy:
|
||||
type: number
|
||||
energyFootprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
exclusive:
|
||||
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
|
||||
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
|
||||
jobs of same user'
|
||||
example: 1
|
||||
maximum: 2
|
||||
minimum: 0
|
||||
type: integer
|
||||
flopsAnyAvg:
|
||||
description: FlopsAnyAvg as Float64
|
||||
type: number
|
||||
footprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
id:
|
||||
description: The unique identifier of a job in the database
|
||||
type: integer
|
||||
jobId:
|
||||
description: The unique identifier of a job
|
||||
example: 123000
|
||||
type: integer
|
||||
jobState:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
description: Final state of job
|
||||
enum:
|
||||
- completed
|
||||
- failed
|
||||
@ -211,79 +201,53 @@ definitions:
|
||||
- timeout
|
||||
- out_of_memory
|
||||
example: completed
|
||||
loadAvg:
|
||||
description: LoadAvg as Float64
|
||||
type: number
|
||||
memBwAvg:
|
||||
description: MemBwAvg as Float64
|
||||
type: number
|
||||
memUsedMax:
|
||||
description: MemUsedMax as Float64
|
||||
type: number
|
||||
metaData:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Additional information about the job
|
||||
type: object
|
||||
monitoringStatus:
|
||||
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
|
||||
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
|
||||
example: 1
|
||||
maximum: 3
|
||||
minimum: 0
|
||||
type: integer
|
||||
numAcc:
|
||||
description: Number of accelerators used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
numHwthreads:
|
||||
description: NumCores int32 `json:"numCores" db:"num_cores"
|
||||
example:"20" minimum:"1"` //
|
||||
Number of HWThreads used (Min > 0)
|
||||
example: 20
|
||||
minimum: 1
|
||||
type: integer
|
||||
numNodes:
|
||||
description: Number of nodes used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
partition:
|
||||
description: The Slurm partition to which the job was submitted
|
||||
example: main
|
||||
type: string
|
||||
project:
|
||||
description: The unique identifier of a project
|
||||
example: abcd200
|
||||
type: string
|
||||
resources:
|
||||
description: Resources used by job
|
||||
items:
|
||||
$ref: '#/definitions/schema.Resource'
|
||||
type: array
|
||||
smt:
|
||||
description: SMT threads used by job
|
||||
example: 4
|
||||
type: integer
|
||||
startTime:
|
||||
description: Start time as 'time.Time' data type
|
||||
type: string
|
||||
subCluster:
|
||||
description: The unique identifier of a sub cluster
|
||||
example: main
|
||||
type: string
|
||||
tags:
|
||||
description: List of tags
|
||||
items:
|
||||
$ref: '#/definitions/schema.Tag'
|
||||
type: array
|
||||
user:
|
||||
description: The unique identifier of a user
|
||||
example: abcd100h
|
||||
type: string
|
||||
walltime:
|
||||
description: Requested walltime of job in seconds (Min > 0)
|
||||
example: 86400
|
||||
minimum: 1
|
||||
type: integer
|
||||
@ -308,39 +272,40 @@ definitions:
|
||||
description: Meta data information of a HPC job.
|
||||
properties:
|
||||
arrayJobId:
|
||||
description: The unique identifier of an array job
|
||||
example: 123000
|
||||
type: integer
|
||||
cluster:
|
||||
description: The unique identifier of a cluster
|
||||
example: fritz
|
||||
type: string
|
||||
concurrentJobs:
|
||||
$ref: '#/definitions/schema.JobLinkResultList'
|
||||
duration:
|
||||
description: Duration of job in seconds (Min > 0)
|
||||
example: 43200
|
||||
minimum: 1
|
||||
type: integer
|
||||
energy:
|
||||
type: number
|
||||
energyFootprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
exclusive:
|
||||
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
|
||||
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
|
||||
jobs of same user'
|
||||
example: 1
|
||||
maximum: 2
|
||||
minimum: 0
|
||||
type: integer
|
||||
footprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
id:
|
||||
description: The unique identifier of a job in the database
|
||||
type: integer
|
||||
jobId:
|
||||
description: The unique identifier of a job
|
||||
example: 123000
|
||||
type: integer
|
||||
jobState:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
description: Final state of job
|
||||
enum:
|
||||
- completed
|
||||
- failed
|
||||
@ -352,74 +317,56 @@ definitions:
|
||||
metaData:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Additional information about the job
|
||||
type: object
|
||||
monitoringStatus:
|
||||
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
|
||||
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
|
||||
example: 1
|
||||
maximum: 3
|
||||
minimum: 0
|
||||
type: integer
|
||||
numAcc:
|
||||
description: Number of accelerators used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
numHwthreads:
|
||||
description: NumCores int32 `json:"numCores" db:"num_cores"
|
||||
example:"20" minimum:"1"` //
|
||||
Number of HWThreads used (Min > 0)
|
||||
example: 20
|
||||
minimum: 1
|
||||
type: integer
|
||||
numNodes:
|
||||
description: Number of nodes used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
partition:
|
||||
description: The Slurm partition to which the job was submitted
|
||||
example: main
|
||||
type: string
|
||||
project:
|
||||
description: The unique identifier of a project
|
||||
example: abcd200
|
||||
type: string
|
||||
resources:
|
||||
description: Resources used by job
|
||||
items:
|
||||
$ref: '#/definitions/schema.Resource'
|
||||
type: array
|
||||
smt:
|
||||
description: SMT threads used by job
|
||||
example: 4
|
||||
type: integer
|
||||
startTime:
|
||||
description: Start epoch time stamp in seconds (Min > 0)
|
||||
example: 1649723812
|
||||
minimum: 1
|
||||
type: integer
|
||||
statistics:
|
||||
additionalProperties:
|
||||
$ref: '#/definitions/schema.JobStatistics'
|
||||
description: Metric statistics of job
|
||||
type: object
|
||||
subCluster:
|
||||
description: The unique identifier of a sub cluster
|
||||
example: main
|
||||
type: string
|
||||
tags:
|
||||
description: List of tags
|
||||
items:
|
||||
$ref: '#/definitions/schema.Tag'
|
||||
type: array
|
||||
user:
|
||||
description: The unique identifier of a user
|
||||
example: abcd100h
|
||||
type: string
|
||||
walltime:
|
||||
description: Requested walltime of job in seconds (Min > 0)
|
||||
example: 86400
|
||||
minimum: 1
|
||||
type: integer
|
||||
@ -486,6 +433,12 @@ definitions:
|
||||
type: number
|
||||
caution:
|
||||
type: number
|
||||
energy:
|
||||
type: string
|
||||
footprint:
|
||||
type: string
|
||||
lowerIsBetter:
|
||||
type: boolean
|
||||
name:
|
||||
type: string
|
||||
normal:
|
||||
@ -541,18 +494,14 @@ definitions:
|
||||
description: A resource used by a job
|
||||
properties:
|
||||
accelerators:
|
||||
description: List of of accelerator device ids
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
configuration:
|
||||
description: The configuration options of the node
|
||||
type: string
|
||||
hostname:
|
||||
description: Name of the host (= node)
|
||||
type: string
|
||||
hwthreads:
|
||||
description: List of OS processor ids
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
@ -580,6 +529,10 @@ definitions:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
median:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
min:
|
||||
items:
|
||||
type: number
|
||||
@ -595,12 +548,24 @@ definitions:
|
||||
properties:
|
||||
coresPerSocket:
|
||||
type: integer
|
||||
energyFootprint:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
flopRateScalar:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
flopRateSimd:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
footprint:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
memoryBandwidth:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
metricConfig:
|
||||
items:
|
||||
$ref: '#/definitions/schema.MetricConfig'
|
||||
type: array
|
||||
name:
|
||||
type: string
|
||||
nodes:
|
||||
@ -620,6 +585,12 @@ definitions:
|
||||
type: number
|
||||
caution:
|
||||
type: number
|
||||
energy:
|
||||
type: string
|
||||
footprint:
|
||||
type: string
|
||||
lowerIsBetter:
|
||||
type: boolean
|
||||
name:
|
||||
type: string
|
||||
normal:
|
||||
@ -633,14 +604,14 @@ definitions:
|
||||
description: Defines a tag using name and type.
|
||||
properties:
|
||||
id:
|
||||
description: The unique DB identifier of a tag
|
||||
type: integer
|
||||
name:
|
||||
description: Tag Name
|
||||
example: Testjob
|
||||
type: string
|
||||
scope:
|
||||
example: global
|
||||
type: string
|
||||
type:
|
||||
description: Tag Type
|
||||
example: Debug
|
||||
type: string
|
||||
type: object
|
||||
@ -699,7 +670,7 @@ info:
|
||||
title: ClusterCockpit REST API
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/clusters/:
|
||||
/api/clusters/:
|
||||
get:
|
||||
description: Get a list of all cluster configs. Specific cluster can be requested
|
||||
using query parameter.
|
||||
@ -736,7 +707,7 @@ paths:
|
||||
summary: Lists all cluster configs
|
||||
tags:
|
||||
- Cluster query
|
||||
/jobs/:
|
||||
/api/jobs/:
|
||||
get:
|
||||
description: |-
|
||||
Get a list of all jobs. Filters can be applied using query parameters.
|
||||
@ -801,7 +772,7 @@ paths:
|
||||
summary: Lists all jobs
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/{id}:
|
||||
/api/jobs/{id}:
|
||||
get:
|
||||
description: |-
|
||||
Job to get is specified by database ID
|
||||
@ -910,7 +881,7 @@ paths:
|
||||
summary: Get job meta and configurable metric data
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/delete_job/:
|
||||
/api/jobs/delete_job/:
|
||||
delete:
|
||||
consumes:
|
||||
- application/json
|
||||
@ -929,7 +900,7 @@ paths:
|
||||
"200":
|
||||
description: Success message
|
||||
schema:
|
||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@ -960,7 +931,7 @@ paths:
|
||||
summary: Remove a job from the sql database
|
||||
tags:
|
||||
- Job remove
|
||||
/jobs/delete_job/{id}:
|
||||
/api/jobs/delete_job/{id}:
|
||||
delete:
|
||||
description: Job to remove is specified by database ID. This will not remove
|
||||
the job from the job archive.
|
||||
@ -976,7 +947,7 @@ paths:
|
||||
"200":
|
||||
description: Success message
|
||||
schema:
|
||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@ -1007,7 +978,7 @@ paths:
|
||||
summary: Remove a job from the sql database
|
||||
tags:
|
||||
- Job remove
|
||||
/jobs/delete_job_before/{ts}:
|
||||
/api/jobs/delete_job_before/{ts}:
|
||||
delete:
|
||||
description: Remove all jobs with start time before timestamp. The jobs will
|
||||
not be removed from the job archive.
|
||||
@ -1023,7 +994,7 @@ paths:
|
||||
"200":
|
||||
description: Success message
|
||||
schema:
|
||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@ -1054,7 +1025,7 @@ paths:
|
||||
summary: Remove a job from the sql database
|
||||
tags:
|
||||
- Job remove
|
||||
/jobs/edit_meta/{id}:
|
||||
/api/jobs/edit_meta/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
@ -1101,7 +1072,7 @@ paths:
|
||||
summary: Edit meta-data json
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/start_job/:
|
||||
/api/jobs/start_job/:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
@ -1121,7 +1092,7 @@ paths:
|
||||
"201":
|
||||
description: Job added successfully
|
||||
schema:
|
||||
$ref: '#/definitions/api.StartJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@ -1148,7 +1119,7 @@ paths:
|
||||
summary: Adds a new job as "running"
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/stop_job/:
|
||||
/api/jobs/stop_job/:
|
||||
post:
|
||||
description: |-
|
||||
Job to stop is specified by request body. All fields are required in this case.
|
||||
@ -1184,8 +1155,7 @@ paths:
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"422":
|
||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
||||
result set'
|
||||
description: 'Unprocessable Entity: job has already been stopped'
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
@ -1197,68 +1167,13 @@ paths:
|
||||
summary: Marks job as completed and triggers archiving
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/stop_job/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Job to stop is specified by database ID. Only stopTime and final state are required in request body.
|
||||
Returns full job resource information according to 'JobMeta' scheme.
|
||||
parameters:
|
||||
- description: Database ID of Job
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: integer
|
||||
- description: stopTime and final state in request body
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/api.StopJobApiRequest'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Job resource
|
||||
schema:
|
||||
$ref: '#/definitions/schema.JobMeta'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Resource not found
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"422":
|
||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
||||
result set'
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Marks job as completed and triggers archiving
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/tag_job/{id}:
|
||||
/api/jobs/tag_job/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||
Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||
If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||
parameters:
|
||||
- description: Job Database ID
|
||||
@ -1302,128 +1217,11 @@ paths:
|
||||
summary: Adds one or more tags to a job
|
||||
tags:
|
||||
- Job add and modify
|
||||
/user/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
description: |-
|
||||
Modifies user defined by username (id) in one of four possible ways.
|
||||
If more than one formValue is set then only the highest priority field is used.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
parameters:
|
||||
- description: Database ID of User
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: string
|
||||
- description: 'Priority 1: Role to add'
|
||||
enum:
|
||||
- admin
|
||||
- support
|
||||
- manager
|
||||
- user
|
||||
- api
|
||||
in: formData
|
||||
name: add-role
|
||||
type: string
|
||||
- description: 'Priority 2: Role to remove'
|
||||
enum:
|
||||
- admin
|
||||
- support
|
||||
- manager
|
||||
- user
|
||||
- api
|
||||
in: formData
|
||||
name: remove-role
|
||||
type: string
|
||||
- description: 'Priority 3: Project to add'
|
||||
in: formData
|
||||
name: add-project
|
||||
type: string
|
||||
- description: 'Priority 4: Project to remove'
|
||||
in: formData
|
||||
name: remove-project
|
||||
type: string
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
"200":
|
||||
description: Success Response Message
|
||||
schema:
|
||||
type: string
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
type: string
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
type: string
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
type: string
|
||||
"422":
|
||||
description: 'Unprocessable Entity: The user could not be updated'
|
||||
schema:
|
||||
type: string
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Updates an existing user
|
||||
tags:
|
||||
- User
|
||||
/users/:
|
||||
delete:
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
description: |-
|
||||
User defined by username in form data will be deleted from database.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
parameters:
|
||||
- description: User ID to delete
|
||||
in: formData
|
||||
name: username
|
||||
required: true
|
||||
type: string
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
"200":
|
||||
description: User deleted successfully
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
type: string
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
type: string
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
type: string
|
||||
"422":
|
||||
description: 'Unprocessable Entity: deleting user failed'
|
||||
schema:
|
||||
type: string
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Deletes a user
|
||||
tags:
|
||||
- User
|
||||
/api/users/:
|
||||
get:
|
||||
description: |-
|
||||
Returns a JSON-encoded list of users.
|
||||
Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
parameters:
|
||||
- description: If returned list should contain all users or only users with
|
||||
additional special roles
|
||||
@ -1461,46 +1259,73 @@ paths:
|
||||
summary: Returns a list of users
|
||||
tags:
|
||||
- User
|
||||
post:
|
||||
/jobs/tag_job/{id}:
|
||||
delete:
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
- application/json
|
||||
description: |-
|
||||
User specified in form data will be saved to database.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
Removes tag(s) from a job specified by DB ID. Name and Type of Tag(s) must match.
|
||||
Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||
If tagged job is already finished: Tag will be removed from respective archive files.
|
||||
parameters:
|
||||
- description: Unique user ID
|
||||
in: formData
|
||||
name: username
|
||||
- description: Job Database ID
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: string
|
||||
- description: User password
|
||||
in: formData
|
||||
name: password
|
||||
type: integer
|
||||
- description: Array of tag-objects to remove
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
type: string
|
||||
- description: User role
|
||||
enum:
|
||||
- admin
|
||||
- support
|
||||
- manager
|
||||
- user
|
||||
- api
|
||||
in: formData
|
||||
name: role
|
||||
schema:
|
||||
items:
|
||||
$ref: '#/definitions/api.ApiTag'
|
||||
type: array
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Updated job resource
|
||||
schema:
|
||||
$ref: '#/definitions/schema.Job'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Job or tag does not exist
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Removes one or more tags from a job
|
||||
tags:
|
||||
- Job add and modify
|
||||
/tags/:
|
||||
delete:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Removes tags by type and name. Name and Type of Tag(s) must match.
|
||||
Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||
Tag wills be removed from respective archive files.
|
||||
parameters:
|
||||
- description: Array of tag-objects to remove
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
type: string
|
||||
- description: Managed project, required for new manager role user
|
||||
in: formData
|
||||
name: project
|
||||
type: string
|
||||
- description: Users name
|
||||
in: formData
|
||||
name: name
|
||||
type: string
|
||||
- description: Users email
|
||||
in: formData
|
||||
name: email
|
||||
type: string
|
||||
schema:
|
||||
items:
|
||||
$ref: '#/definitions/api.ApiTag'
|
||||
type: array
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
@ -1511,28 +1336,24 @@ paths:
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
type: string
|
||||
"403":
|
||||
description: Forbidden
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Job or tag does not exist
|
||||
schema:
|
||||
type: string
|
||||
"422":
|
||||
description: 'Unprocessable Entity: creating user failed'
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Adds a new user
|
||||
summary: Removes all tags and job-relations for type:name tuple
|
||||
tags:
|
||||
- User
|
||||
- Tag remove
|
||||
securityDefinitions:
|
||||
ApiKeyAuth:
|
||||
in: header
|
||||
|
33
cmd/cc-backend/cli.go
Normal file
33
cmd/cc-backend/cli.go
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import "flag"
|
||||
|
||||
var (
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
)
|
||||
|
||||
func cliInit() {
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize sqlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: <username>:[admin,support,manager,api,user]:<password>")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove a existing user. Argument format: <username>")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug, info (default), warn, err, crit]`")
|
||||
flag.Parse()
|
||||
}
|
95
cmd/cc-backend/init.go
Normal file
95
cmd/cc-backend/init.go
Normal file
@ -0,0 +1,95 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
const envString = `
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using the gen-keypair tool
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
`
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
log.Exit("Directory ./var already exists. Cautiously exiting application initialization.")
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||
log.Abortf("Could not write default ./config.json with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||
log.Abortf("Could not write default ./.env file with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0o777); err != nil {
|
||||
log.Abortf("Could not create default ./var folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||
if err != nil {
|
||||
log.Abortf("Could not initialize default sqlite3 database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
}
|
@ -5,155 +5,49 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
"github.com/go-co-op/gocron"
|
||||
"github.com/google/gops/agent"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
"github.com/joho/godotenv"
|
||||
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
const logoString = `
|
||||
____ _ _ ____ _ _ _
|
||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||
_____ _ _ ____ _ _ _
|
||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
|
||||
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
|
||||
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
\_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
|_|
|
||||
`
|
||||
|
||||
const envString = `
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using the gen-keypair tool
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
`
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
var (
|
||||
date string
|
||||
commit string
|
||||
version string
|
||||
)
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
fmt.Print("Directory ./var already exists. Exiting!\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0666); err != nil {
|
||||
log.Fatalf("Writing config.json failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0666); err != nil {
|
||||
log.Fatalf("Writing .env failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0777); err != nil {
|
||||
log.Fatalf("Mkdir var failed: %s", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||
if err != nil {
|
||||
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
var flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.Parse()
|
||||
cliInit()
|
||||
|
||||
if flagVersion {
|
||||
fmt.Print(logoString)
|
||||
@ -168,23 +62,24 @@ func main() {
|
||||
// Apply config flags for pkg/log
|
||||
log.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// If init flag set, run tasks here before any file dependencies cause errors
|
||||
if flagInit {
|
||||
initEnv()
|
||||
fmt.Print("Succesfully setup environment!\n")
|
||||
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
||||
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
||||
os.Exit(0)
|
||||
log.Exit("Successfully setup environment!\n" +
|
||||
"Please review config.json and .env and adjust it to your needs.\n" +
|
||||
"Add your job-archive at ./var/job-archive.")
|
||||
}
|
||||
|
||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||
if flagGops {
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
log.Fatalf("gops/agent.Listen failed: %s", err.Error())
|
||||
log.Abortf("Could not start gops agent with 'gops/agent.Listen(agent.Options{})'. Application startup failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if err := runtimeEnv.LoadEnv("./.env"); err != nil && !os.IsNotExist(err) {
|
||||
log.Fatalf("parsing './.env' file failed: %s", err.Error())
|
||||
err := godotenv.Load()
|
||||
if err != nil {
|
||||
log.Abortf("Could not parse existing .env file at location './.env'. Application startup failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
// Initialize sub-modules and handle command line flags.
|
||||
@ -202,341 +97,134 @@ func main() {
|
||||
if flagMigrateDB {
|
||||
err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Abortf("MigrateDB Failed: Could not migrate '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
|
||||
}
|
||||
os.Exit(0)
|
||||
log.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
if flagRevertDB {
|
||||
err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Abortf("RevertDB Failed: Could not revert '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1), err.Error())
|
||||
}
|
||||
os.Exit(0)
|
||||
log.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1))
|
||||
}
|
||||
|
||||
if flagForceDB {
|
||||
err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Abortf("ForceDB Failed: Could not force '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
|
||||
}
|
||||
os.Exit(0)
|
||||
log.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
db := repository.GetConnection()
|
||||
|
||||
var authentication *auth.Authentication
|
||||
if !config.Keys.DisableAuthentication {
|
||||
var err error
|
||||
if authentication, err = auth.Init(); err != nil {
|
||||
log.Fatalf("auth initialization failed: %v", err)
|
||||
}
|
||||
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||
authentication.SessionMaxAge = d
|
||||
}
|
||||
auth.Init()
|
||||
|
||||
if flagNewUser != "" {
|
||||
parts := strings.SplitN(flagNewUser, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 {
|
||||
log.Fatal("invalid argument format for user creation")
|
||||
log.Abortf("Add User: Could not parse supplied argument format: No changes.\n"+
|
||||
"Want: <username>:[admin,support,manager,api,user]:<password>\n"+
|
||||
"Have: %s\n", flagNewUser)
|
||||
}
|
||||
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.AddUser(&schema.User{
|
||||
Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err)
|
||||
log.Abortf("Add User: Could not add new user authentication for '%s' and roles '%s'.\nError: %s\n", parts[0], parts[1], err.Error())
|
||||
} else {
|
||||
log.Printf("Add User: Added new user '%s' with roles '%s'.\n", parts[0], parts[1])
|
||||
}
|
||||
}
|
||||
|
||||
if flagDelUser != "" {
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.DelUser(flagDelUser); err != nil {
|
||||
log.Fatalf("deleting user failed: %v", err)
|
||||
log.Abortf("Delete User: Could not delete user '%s' from DB.\nError: %s\n", flagDelUser, err.Error())
|
||||
} else {
|
||||
log.Printf("Delete User: Deleted user '%s' from DB.\n", flagDelUser)
|
||||
}
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
if flagSyncLDAP {
|
||||
if authentication.LdapAuth == nil {
|
||||
log.Fatal("cannot sync: LDAP authentication is not configured")
|
||||
if authHandle.LdapAuth == nil {
|
||||
log.Abort("Sync LDAP: LDAP authentication is not configured, could not synchronize. No changes, exited.")
|
||||
}
|
||||
|
||||
if err := authentication.LdapAuth.Sync(); err != nil {
|
||||
log.Fatalf("LDAP sync failed: %v", err)
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
log.Abortf("Sync LDAP: Could not synchronize, failed with error.\nError: %s\n", err.Error())
|
||||
}
|
||||
log.Info("LDAP sync successfull")
|
||||
log.Print("Sync LDAP: LDAP synchronization successfull.")
|
||||
}
|
||||
|
||||
if flagGenJWT != "" {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(flagGenJWT)
|
||||
if err != nil {
|
||||
log.Fatalf("could not get user from JWT: %v", err)
|
||||
log.Abortf("JWT: Could not get supplied user '%s' from DB. No changes, exited.\nError: %s\n", flagGenJWT, err.Error())
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleApi) {
|
||||
log.Warnf("user '%s' does not have the API role", user.Username)
|
||||
log.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authentication.JwtAuth.ProvideJWT(user)
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
||||
log.Abortf("JWT: User '%s' found in DB, but failed to provide JWT.\nError: %s\n", user.Username, err.Error())
|
||||
}
|
||||
|
||||
fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
|
||||
log.Printf("JWT: Successfully generated JWT for user '%s': %s\n", user.Username, jwt)
|
||||
}
|
||||
|
||||
} else if flagNewUser != "" || flagDelUser != "" {
|
||||
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
|
||||
log.Abort("Error: Arguments '--add-user' and '--del-user' can only be used if authentication is enabled. No changes, exited.")
|
||||
}
|
||||
|
||||
if err := archive.Init(config.Keys.Archive, config.Keys.DisableArchive); err != nil {
|
||||
log.Fatalf("failed to initialize archive: %s", err.Error())
|
||||
log.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
||||
if err := metricdata.Init(); err != nil {
|
||||
log.Abortf("Init: Failed to initialize metricdata repository.\nError %s\n", err.Error())
|
||||
}
|
||||
|
||||
if flagReinitDB {
|
||||
if err := importer.InitDB(); err != nil {
|
||||
log.Fatalf("failed to re-initialize repository DB: %s", err.Error())
|
||||
log.Abortf("Init DB: Failed to re-initialize repository DB.\nError: %s\n", err.Error())
|
||||
} else {
|
||||
log.Print("Init DB: Sucessfully re-initialized repository DB.")
|
||||
}
|
||||
}
|
||||
|
||||
if flagImportJob != "" {
|
||||
if err := importer.HandleImportFlag(flagImportJob); err != nil {
|
||||
log.Fatalf("job import failed: %s", err.Error())
|
||||
log.Abortf("Import Job: Job import failed.\nError: %s\n", err.Error())
|
||||
} else {
|
||||
log.Printf("Import Job: Imported Job '%s' into DB.\n", flagImportJob)
|
||||
}
|
||||
}
|
||||
|
||||
if !flagServer {
|
||||
return
|
||||
log.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||
}
|
||||
|
||||
// Setup the http.Handler/Router used by the server
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
api := &api.RestApi{
|
||||
JobRepository: jobRepo,
|
||||
Resolver: resolver,
|
||||
MachineStateDir: config.Keys.MachineStateDir,
|
||||
Authentication: authentication,
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
|
||||
info := map[string]interface{}{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if config.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authentication)
|
||||
openIDConnect.RegisterEndpoints(r)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
log.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
}).Methods(http.MethodGet)
|
||||
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := r.PathPrefix("/").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
r.Handle("/login", authentication.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
|
||||
r.Handle("/jwt-login", authentication.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))
|
||||
|
||||
r.Handle("/logout", authentication.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
MsgType: "alert-info",
|
||||
Message: "Logout successful",
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))).Methods(http.MethodPost)
|
||||
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authentication.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
MsgType: "alert-danger",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
api.MountRoutes(secured)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
log.Info("Use local directory for static images")
|
||||
r.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
r.PathPrefix("/").Handler(web.ServeFiles())
|
||||
} else {
|
||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
r.Use(handlers.CompressHandler)
|
||||
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
r.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
taskManager.Start()
|
||||
serverInit()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
server := http.Server{
|
||||
ReadTimeout: 10 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err = runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
}
|
||||
serverStart()
|
||||
}()
|
||||
|
||||
wg.Add(1)
|
||||
@ -547,117 +235,15 @@ func main() {
|
||||
<-sigs
|
||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
serverShutdown()
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
api.JobRepository.WaitForArchiving()
|
||||
taskManager.Shutdown()
|
||||
}()
|
||||
|
||||
s := gocron.NewScheduler(time.Local)
|
||||
|
||||
if config.Keys.StopJobsExceedingWalltime > 0 {
|
||||
log.Info("Register undead jobs service")
|
||||
|
||||
s.Every(1).Day().At("3:00").Do(func() {
|
||||
err = jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||
}
|
||||
runtime.GC()
|
||||
})
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
Retention schema.Retention `json:"retention"`
|
||||
Compression int `json:"compression"`
|
||||
}
|
||||
|
||||
cfg.Retention.IncludeDB = true
|
||||
|
||||
if err = json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
}
|
||||
|
||||
switch cfg.Retention.Policy {
|
||||
case "delete":
|
||||
log.Info("Register retention delete service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().CleanUp(jobs)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %s", err.Error())
|
||||
}
|
||||
}
|
||||
})
|
||||
case "move":
|
||||
log.Info("Register retention move service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().Move(jobs, cfg.Retention.Location)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if cfg.Compression > 0 {
|
||||
log.Info("Register compression service")
|
||||
|
||||
s.Every(1).Day().At("5:00").Do(func() {
|
||||
var jobs []*schema.Job
|
||||
|
||||
ar := archive.GetHandle()
|
||||
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
|
||||
lastTime := ar.CompressLast(startTime)
|
||||
if startTime == lastTime {
|
||||
log.Info("Compression Service - Complete archive run")
|
||||
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
||||
|
||||
} else {
|
||||
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for compression jobs: %v", err)
|
||||
}
|
||||
ar.Compress(jobs)
|
||||
})
|
||||
}
|
||||
|
||||
s.StartAsync()
|
||||
|
||||
if os.Getenv("GOGC") == "" {
|
||||
debug.SetGCPercent(25)
|
||||
}
|
||||
runtimeEnv.SystemdNotifiy(true, "running")
|
||||
wg.Wait()
|
||||
log.Print("Gracefull shutdown completed!")
|
||||
log.Print("Graceful shutdown completed!")
|
||||
}
|
||||
|
330
cmd/cc-backend/server.go
Normal file
330
cmd/cc-backend/server.go
Normal file
@ -0,0 +1,330 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/handler/transport"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
)
|
||||
|
||||
var (
|
||||
router *mux.Router
|
||||
server *http.Server
|
||||
apiHandle *api.RestApi
|
||||
)
|
||||
|
||||
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
json.NewEncoder(rw).Encode(map[string]string{
|
||||
"status": http.StatusText(http.StatusUnauthorized),
|
||||
"error": err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
func serverInit() {
|
||||
// Setup the http.Handler/Router used by the server
|
||||
graph.Init()
|
||||
resolver := graph.GetResolverInstance()
|
||||
graphQLServer := handler.New(
|
||||
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
|
||||
// graphQLServer.AddTransport(transport.SSE{})
|
||||
graphQLServer.AddTransport(transport.POST{})
|
||||
// graphQLServer.AddTransport(transport.Websocket{
|
||||
// KeepAlivePingInterval: 10 * time.Second,
|
||||
// Upgrader: websocket.Upgrader{
|
||||
// CheckOrigin: func(r *http.Request) bool {
|
||||
// return true
|
||||
// },
|
||||
// },
|
||||
// })
|
||||
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLServer.SetRecoverFunc(func(ctx context.Context, err any) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %s", e.Error())
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
apiHandle = api.New()
|
||||
|
||||
router = mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
|
||||
info := map[string]any{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if config.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authHandle)
|
||||
openIDConnect.RegisterEndpoints(router)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
log.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
}).Methods(http.MethodGet)
|
||||
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := router.PathPrefix("/").Subrouter()
|
||||
securedapi := router.PathPrefix("/api").Subrouter()
|
||||
userapi := router.PathPrefix("/userapi").Subrouter()
|
||||
configapi := router.PathPrefix("/config").Subrouter()
|
||||
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
router.Handle("/login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
|
||||
router.Handle("/jwt-login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))
|
||||
|
||||
router.Handle("/logout", authHandle.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
MsgType: "alert-info",
|
||||
Message: "Logout successful",
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))).Methods(http.MethodPost)
|
||||
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
MsgType: "alert-danger",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
Redirect: r.RequestURI,
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
securedapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
userapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthUserApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
configapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthConfigApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
frontendapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthFrontendApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLServer)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
apiHandle.MountApiRoutes(securedapi)
|
||||
apiHandle.MountUserApiRoutes(userapi)
|
||||
apiHandle.MountConfigApiRoutes(configapi)
|
||||
apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
log.Info("Use local directory for static images")
|
||||
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
router.PathPrefix("/").Handler(web.ServeFiles())
|
||||
} else {
|
||||
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
router.Use(handlers.CompressHandler)
|
||||
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
router.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
}
|
||||
|
||||
func serverStart() {
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
|
||||
server = &http.Server{
|
||||
ReadTimeout: 20 * time.Second,
|
||||
WriteTimeout: 20 * time.Second,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Abortf("Server Start: Starting http listener on '%s' failed.\nError: %s\n", config.Keys.Addr, err.Error())
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(
|
||||
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Abortf("Server Start: Loading X509 keypair failed. Check options 'https-cert-file' and 'https-key-file' in 'config.json'.\nError: %s\n", err.Error())
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
log.Printf("HTTPS server listening at %s...\n", config.Keys.Addr)
|
||||
} else {
|
||||
log.Printf("HTTP server listening at %s...\n", config.Keys.Addr)
|
||||
}
|
||||
//
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Abortf("Server Start: Error while preparing server start.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Abortf("Server Start: Starting server failed.\nError: %s\n", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func serverShutdown() {
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
archiver.WaitForArchiving()
|
||||
}
|
@ -1,56 +1,70 @@
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"emission-constant": 317,
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
69
configs/config-mariadb.json
Normal file
69
configs/config-mariadb.json
Normal file
@ -0,0 +1,69 @@
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"db-driver": "mysql",
|
||||
"db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit",
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
},
|
||||
"emission-constant": 317,
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@ -1,50 +1,62 @@
|
||||
{
|
||||
"addr": "0.0.0.0:443",
|
||||
"ldap": {
|
||||
"url": "ldaps://test",
|
||||
"user_base": "ou=people,ou=hpc,dc=test,dc=de",
|
||||
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
|
||||
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
|
||||
"user_filter": "(&(objectclass=posixAccount))"
|
||||
},
|
||||
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
|
||||
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
||||
"user": "clustercockpit",
|
||||
"group": "clustercockpit",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"validate": true,
|
||||
"clusters": [
|
||||
{
|
||||
"name": "test",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": "eyJhbGciOiJF-E-pQBQ"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
"addr": "0.0.0.0:443",
|
||||
"ldap": {
|
||||
"url": "ldaps://test",
|
||||
"user_base": "ou=people,ou=hpc,dc=test,dc=de",
|
||||
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
|
||||
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
|
||||
"user_filter": "(&(objectclass=posixAccount))"
|
||||
},
|
||||
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
|
||||
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
||||
"user": "clustercockpit",
|
||||
"group": "clustercockpit",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"validate": false,
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"clusters": [
|
||||
{
|
||||
"name": "test",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": "eyJhbGciOiJF-E-pQBQ"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
],
|
||||
"jwts": {
|
||||
"cookieName": "",
|
||||
"validateUser": false,
|
||||
"max-age": "2000h",
|
||||
"trustedIssuer": ""
|
||||
},
|
||||
"short-running-jobs-duration": 300
|
||||
}
|
||||
}
|
||||
],
|
||||
"jwts": {
|
||||
"cookieName": "",
|
||||
"validateUser": false,
|
||||
"max-age": "2000h",
|
||||
"trustedIssuer": ""
|
||||
},
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
},
|
||||
"short-running-jobs-duration": 300
|
||||
}
|
||||
|
12
configs/default_metrics.json
Normal file
12
configs/default_metrics.json
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"default_metrics": "cpu_load, flops_any, core_power, lustre_open, mem_used, mem_bw, net_bytes_in"
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"default_metrics": "flops_any, mem_bw, mem_used, vectorization_ratio"
|
||||
}
|
||||
]
|
||||
}
|
@ -117,10 +117,12 @@ foreach my $ln (split("\n", $topo)) {
|
||||
|
||||
my $node;
|
||||
my @sockets;
|
||||
my @nodeCores;
|
||||
foreach my $socket ( @{$DOMAINS{socket}} ) {
|
||||
push @sockets, "[".join(",", @{$socket})."]";
|
||||
$node .= join(",", @{$socket})
|
||||
push @nodeCores, join(",", @{$socket});
|
||||
}
|
||||
$node = join(",", @nodeCores);
|
||||
$INFO{sockets} = join(",\n", @sockets);
|
||||
|
||||
my @memDomains;
|
||||
@ -212,9 +214,27 @@ print <<"END";
|
||||
"socketsPerNode": $INFO{socketsPerNode},
|
||||
"coresPerSocket": $INFO{coresPerSocket},
|
||||
"threadsPerCore": $INFO{threadsPerCore},
|
||||
"flopRateScalar": $flopsScalar,
|
||||
"flopRateSimd": $flopsSimd,
|
||||
"memoryBandwidth": $memBw,
|
||||
"flopRateScalar": {
|
||||
"unit": {
|
||||
"base": "F/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $flopsScalar
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"unit": {
|
||||
"base": "F/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $flopsSimd
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"unit": {
|
||||
"base": "B/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $memBw
|
||||
},
|
||||
"nodes": "<FILL IN NODE RANGES>",
|
||||
"topology": {
|
||||
"node": [$node],
|
||||
|
113
go.mod
113
go.mod
@ -1,91 +1,92 @@
|
||||
module github.com/ClusterCockpit/cc-backend
|
||||
|
||||
go 1.18
|
||||
go 1.23.5
|
||||
|
||||
toolchain go1.24.1
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.45
|
||||
github.com/99designs/gqlgen v0.17.66
|
||||
github.com/ClusterCockpit/cc-units v0.4.0
|
||||
github.com/Masterminds/squirrel v1.5.3
|
||||
github.com/coreos/go-oidc/v3 v3.9.0
|
||||
github.com/go-co-op/gocron v1.25.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.4
|
||||
github.com/go-sql-driver/mysql v1.7.0
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1
|
||||
github.com/golang-migrate/migrate/v4 v4.15.2
|
||||
github.com/google/gops v0.3.27
|
||||
github.com/gorilla/handlers v1.5.1
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/gorilla/sessions v1.2.1
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.12.2
|
||||
github.com/jmoiron/sqlx v1.3.5
|
||||
github.com/mattn/go-sqlite3 v1.14.16
|
||||
github.com/prometheus/client_golang v1.14.0
|
||||
github.com/prometheus/common v0.40.0
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
github.com/coreos/go-oidc/v3 v3.12.0
|
||||
github.com/go-co-op/gocron/v2 v2.16.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.10
|
||||
github.com/go-sql-driver/mysql v1.9.0
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2
|
||||
github.com/golang-migrate/migrate/v4 v4.18.2
|
||||
github.com/google/gops v0.3.28
|
||||
github.com/gorilla/handlers v1.5.2
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/mattn/go-sqlite3 v1.14.24
|
||||
github.com/prometheus/client_golang v1.21.0
|
||||
github.com/prometheus/common v0.62.0
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
|
||||
github.com/swaggo/http-swagger v1.3.3
|
||||
github.com/swaggo/swag v1.16.3
|
||||
github.com/vektah/gqlparser/v2 v2.5.11
|
||||
golang.org/x/crypto v0.21.0
|
||||
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
|
||||
golang.org/x/oauth2 v0.13.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||
github.com/swaggo/http-swagger v1.3.4
|
||||
github.com/swaggo/swag v1.16.4
|
||||
github.com/vektah/gqlparser/v2 v2.5.22
|
||||
golang.org/x/crypto v0.35.0
|
||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa
|
||||
golang.org/x/oauth2 v0.27.0
|
||||
golang.org/x/time v0.5.0
|
||||
)
|
||||
|
||||
require (
|
||||
filippo.io/edwards25519 v1.1.0 // indirect
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.1.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/containerd/containerd v1.6.26 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
github.com/deepmap/oapi-codegen v1.12.4 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.3 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
|
||||
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
||||
github.com/go-openapi/spec v0.21.0 // indirect
|
||||
github.com/go-openapi/swag v0.23.0 // indirect
|
||||
github.com/golang/protobuf v1.5.3 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||
github.com/joho/godotenv v1.5.1 // indirect
|
||||
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/mailru/easyjson v0.9.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/client_model v0.3.0 // indirect
|
||||
github.com/prometheus/procfs v0.9.0 // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sosodev/duration v1.2.0 // indirect
|
||||
github.com/swaggo/files v1.0.0 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.1 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
|
||||
go.uber.org/atomic v1.10.0 // indirect
|
||||
golang.org/x/mod v0.16.0 // indirect
|
||||
golang.org/x/net v0.22.0 // indirect
|
||||
golang.org/x/sys v0.18.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
golang.org/x/tools v0.19.0 // indirect
|
||||
google.golang.org/appengine v1.6.8 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect
|
||||
google.golang.org/protobuf v1.33.0 // indirect
|
||||
github.com/sosodev/duration v1.3.1 // indirect
|
||||
github.com/swaggo/files v1.0.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.5 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
golang.org/x/mod v0.23.0 // indirect
|
||||
golang.org/x/net v0.36.0 // indirect
|
||||
golang.org/x/sync v0.11.0 // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
golang.org/x/text v0.22.0 // indirect
|
||||
golang.org/x/tools v0.30.0 // indirect
|
||||
google.golang.org/protobuf v1.36.5 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
||||
|
62
gqlgen.yml
62
gqlgen.yml
@ -30,6 +30,7 @@ resolver:
|
||||
# gqlgen will search for any type names in the schema in these go packages
|
||||
# if they match it will use them, otherwise it will generate them.
|
||||
autobind:
|
||||
- "github.com/99designs/gqlgen/graphql/introspection"
|
||||
- "github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
|
||||
# This section declares type mapping between the GraphQL and go type systems
|
||||
@ -61,23 +62,50 @@ models:
|
||||
fields:
|
||||
partitions:
|
||||
resolver: true
|
||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
NullableFloat:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
JobStatistics:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
GlobalMetricListItem:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
|
||||
}
|
||||
ClusterSupport:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
||||
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Resource:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
||||
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
|
||||
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
|
||||
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
MetricStatistics:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
|
||||
}
|
||||
MetricConfig:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
SubClusterConfig:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
|
||||
}
|
||||
Accelerator:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
FilterRanges:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
||||
|
@ -1,5 +1,5 @@
|
||||
[Unit]
|
||||
Description=ClusterCockpit Web Server (Go edition)
|
||||
Description=ClusterCockpit Web Server
|
||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
@ -14,13 +14,16 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@ -42,6 +45,9 @@ func setup(t *testing.T) *api.RestApi {
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
@ -117,7 +123,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@ -144,23 +150,20 @@ func setup(t *testing.T) *api.RestApi {
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
repository.Connect("sqlite3", dbfilepath)
|
||||
db := repository.GetConnection()
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
if err := metricdata.Init(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
auth.Init()
|
||||
graph.Init()
|
||||
|
||||
return &api.RestApi{
|
||||
JobRepository: resolver.Repo,
|
||||
Resolver: resolver,
|
||||
}
|
||||
return api.New()
|
||||
}
|
||||
|
||||
func cleanup() {
|
||||
@ -175,7 +178,6 @@ func cleanup() {
|
||||
func TestRestApi(t *testing.T) {
|
||||
restapi := setup(t)
|
||||
t.Cleanup(cleanup)
|
||||
|
||||
testData := schema.JobData{
|
||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||
schema.MetricScopeNode: {
|
||||
@ -192,12 +194,18 @@ func TestRestApi(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
return testData, nil
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
restapi.MountRoutes(r)
|
||||
r.PathPrefix("/api").Subrouter()
|
||||
r.StrictSlash(true)
|
||||
restapi.MountApiRoutes(r)
|
||||
|
||||
var TestJobId int64 = 123
|
||||
var TestClusterName string = "testcluster"
|
||||
var TestStartTime int64 = 123456789
|
||||
|
||||
const startJobBody string = `{
|
||||
"jobId": 123,
|
||||
@ -213,7 +221,7 @@ func TestRestApi(t *testing.T) {
|
||||
"exclusive": 1,
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName" }],
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
@ -224,28 +232,33 @@ func TestRestApi(t *testing.T) {
|
||||
"startTime": 123456789
|
||||
}`
|
||||
|
||||
var dbid int64
|
||||
const contextUserKey repository.ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
if ok := t.Run("StartJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
var res api.StartJobApiResponse
|
||||
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
|
||||
resolver := graph.GetResolverInstance()
|
||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
|
||||
job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -269,11 +282,9 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatalf("unexpected job properties: %#v", job)
|
||||
}
|
||||
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||
}
|
||||
|
||||
dbid = res.DBID
|
||||
}); !ok {
|
||||
return
|
||||
}
|
||||
@ -289,17 +300,19 @@ func TestRestApi(t *testing.T) {
|
||||
|
||||
var stoppedJob *schema.Job
|
||||
if ok := t.Run("StopJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
|
||||
archiver.WaitForArchiving()
|
||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -327,7 +340,7 @@ func TestRestApi(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("CheckArchive", func(t *testing.T) {
|
||||
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
|
||||
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -341,10 +354,12 @@ func TestRestApi(t *testing.T) {
|
||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusUnprocessableEntity {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@ -371,10 +386,12 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@ -384,8 +401,10 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal("subtest failed")
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
const stopJobBodyFailed string = `{
|
||||
"jobId": 12345,
|
||||
"jobId": 12345,
|
||||
"cluster": "testcluster",
|
||||
|
||||
"jobState": "failed",
|
||||
@ -393,16 +412,18 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
archiver.WaitForArchiving()
|
||||
jobid, cluster := int64(12345), "testcluster"
|
||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||
if err != nil {
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
94
internal/archiver/archiveWorker.go
Normal file
94
internal/archiver/archiveWorker.go
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
var (
|
||||
archivePending sync.WaitGroup
|
||||
archiveChannel chan *schema.Job
|
||||
jobRepo *repository.JobRepository
|
||||
)
|
||||
|
||||
func Start(r *repository.JobRepository) {
|
||||
archiveChannel = make(chan *schema.Job, 128)
|
||||
jobRepo = r
|
||||
|
||||
go archivingWorker()
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
func archivingWorker() {
|
||||
for {
|
||||
select {
|
||||
case job, ok := <-archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
stmt := sq.Update("job").Where("job.id = ?", job.ID)
|
||||
|
||||
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
// Update the jobs database entry one last time:
|
||||
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||
if err := jobRepo.Execute(stmt); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
func TriggerArchiving(job *schema.Job) {
|
||||
if archiveChannel == nil {
|
||||
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||
}
|
||||
|
||||
archivePending.Add(1)
|
||||
archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
archivePending.Wait()
|
||||
}
|
83
internal/archiver/archiver.go
Normal file
83
internal/archiver/archiver.go
Normal file
@ -0,0 +1,83 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
// FIXME: Add a config option for this
|
||||
if job.NumNodes <= 8 {
|
||||
// This will add the native scope if core scope is not available
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
if job.NumAcc > 0 {
|
||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||
}
|
||||
|
||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// This should never happen ?
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
// Round AVG Result to 2 Digits
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if config.Keys.DisableArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
}
|
@ -10,12 +10,19 @@ import (
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/gorilla/sessions"
|
||||
@ -26,6 +33,24 @@ type Authenticator interface {
|
||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||
}
|
||||
|
||||
var (
|
||||
initOnce sync.Once
|
||||
authInstance *Authentication
|
||||
)
|
||||
|
||||
var ipUserLimiters sync.Map
|
||||
|
||||
func getIPUserLimiter(ip, username string) *rate.Limiter {
|
||||
key := ip + ":" + username
|
||||
limiter, ok := ipUserLimiters.Load(key)
|
||||
if !ok {
|
||||
newLimiter := rate.NewLimiter(rate.Every(time.Hour/10), 10)
|
||||
ipUserLimiters.Store(key, newLimiter)
|
||||
return newLimiter
|
||||
}
|
||||
return limiter.(*rate.Limiter)
|
||||
}
|
||||
|
||||
type Authentication struct {
|
||||
sessionStore *sessions.CookieStore
|
||||
LdapAuth *LdapAuthenticator
|
||||
@ -62,82 +87,111 @@ func (auth *Authentication) AuthViaSession(
|
||||
}, nil
|
||||
}
|
||||
|
||||
func Init() (*Authentication, error) {
|
||||
auth := &Authentication{}
|
||||
func Init() {
|
||||
initOnce.Do(func() {
|
||||
authInstance = &Authentication{}
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing authentication -> decoding session key failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
if config.Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
auth.LdapAuth = ldapAuth
|
||||
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig != nil {
|
||||
auth.JwtAuth = &JWTAuthenticator{}
|
||||
if err := auth.JwtAuth.Init(); err != nil {
|
||||
log.Error("Error while initializing authentication -> jwtAuth init failed")
|
||||
return nil, err
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err == nil {
|
||||
authInstance.SessionMaxAge = d
|
||||
}
|
||||
|
||||
if config.Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
} else {
|
||||
authInstance.LdapAuth = ldapAuth
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
auth.authenticators = append(auth.authenticators, jwtSessionAuth)
|
||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
if config.Keys.JwtConfig != nil {
|
||||
authInstance.JwtAuth = &JWTAuthenticator{}
|
||||
if err := authInstance.JwtAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
|
||||
}
|
||||
} else {
|
||||
auth.authenticators = append(auth.authenticators, jwtCookieSessionAuth)
|
||||
log.Info("Missing JWT configuration: No JWT token support!")
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing JWT configuration: No JWT token support!")
|
||||
}
|
||||
|
||||
auth.LocalAuth = &LocalAuthenticator{}
|
||||
if err := auth.LocalAuth.Init(); err != nil {
|
||||
log.Error("Error while initializing authentication -> localAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
|
||||
|
||||
return auth, nil
|
||||
authInstance.LocalAuth = &LocalAuthenticator{}
|
||||
if err := authInstance.LocalAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||
}
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
|
||||
})
|
||||
}
|
||||
|
||||
func persistUser(user *schema.User) {
|
||||
func GetAuthInstance() *Authentication {
|
||||
if authInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return authInstance
|
||||
}
|
||||
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
_, err := r.GetUser(user.Username)
|
||||
dbUser, err := r.GetUser(tokenUser.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", user.Username, err)
|
||||
} else if err == sql.ErrNoRows {
|
||||
if err := r.AddUser(user); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
|
||||
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(tokenUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(OIDCUser.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(OIDCUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -153,6 +207,10 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
|
||||
if auth.SessionMaxAge != 0 {
|
||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||
}
|
||||
if config.Keys.HttpsCertFile == "" && config.Keys.HttpsKeyFile == "" {
|
||||
session.Options.Secure = false
|
||||
}
|
||||
session.Options.SameSite = http.SameSiteStrictMode
|
||||
session.Values["username"] = user.Username
|
||||
session.Values["projects"] = user.Projects
|
||||
session.Values["roles"] = user.Roles
|
||||
@ -166,13 +224,24 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
|
||||
}
|
||||
|
||||
func (auth *Authentication) Login(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
username := r.FormValue("username")
|
||||
var dbUser *schema.User
|
||||
ip, _, err := net.SplitHostPort(r.RemoteAddr)
|
||||
if err != nil {
|
||||
ip = r.RemoteAddr
|
||||
}
|
||||
|
||||
username := r.FormValue("username")
|
||||
|
||||
limiter := getIPUserLimiter(ip, username)
|
||||
if !limiter.Allow() {
|
||||
log.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
||||
onfailure(rw, r, errors.New("Too many login attempts, try again in a few minutes."))
|
||||
return
|
||||
}
|
||||
|
||||
var dbUser *schema.User
|
||||
if username != "" {
|
||||
var err error
|
||||
dbUser, err = repository.GetUserRepository().GetUser(username)
|
||||
@ -203,7 +272,13 @@ func (auth *Authentication) Login(
|
||||
|
||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
|
||||
if r.FormValue("redirect") != "" {
|
||||
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
@ -219,31 +294,150 @@ func (auth *Authentication) Auth(
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("authentication failed: %s", err.Error())
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
if user == nil {
|
||||
user, err = auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("authentication failed: %s", err.Error())
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("authentication failed")
|
||||
log.Info("auth -> authentication failed")
|
||||
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
ipErr := securedCheck(user, r)
|
||||
if ipErr != nil {
|
||||
log.Infof("auth api -> secured check failed: %s", ipErr.Error())
|
||||
onfailure(rw, r, ipErr)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleApi) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthUserApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleApi) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth user api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth user api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthConfigApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil && user.HasRole(schema.RoleAdmin) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth config api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthFrontendApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth frontend api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
@ -263,3 +457,38 @@ func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
onsuccess.ServeHTTP(rw, r)
|
||||
})
|
||||
}
|
||||
|
||||
// Helper Moved To MiddleWare Auth Handlers
|
||||
func securedCheck(user *schema.User, r *http.Request) error {
|
||||
if user == nil {
|
||||
return fmt.Errorf("no user for secured check")
|
||||
}
|
||||
|
||||
// extract IP address for checking
|
||||
IPAddress := r.Header.Get("X-Real-Ip")
|
||||
if IPAddress == "" {
|
||||
IPAddress = r.Header.Get("X-Forwarded-For")
|
||||
}
|
||||
if IPAddress == "" {
|
||||
IPAddress = r.RemoteAddr
|
||||
}
|
||||
|
||||
if strings.Contains(IPAddress, ":") {
|
||||
IPAddress = strings.Split(IPAddress, ":")[0]
|
||||
}
|
||||
|
||||
// If nothing declared in config: deny all request to this api endpoint
|
||||
if len(config.Keys.ApiAllowedIPs) == 0 {
|
||||
return fmt.Errorf("missing configuration key ApiAllowedIPs")
|
||||
}
|
||||
// If wildcard declared in config: Continue
|
||||
if config.Keys.ApiAllowedIPs[0] == "*" {
|
||||
return nil
|
||||
}
|
||||
// check if IP is allowed
|
||||
if !util.Contains(config.Keys.ApiAllowedIPs, IPAddress) {
|
||||
return fmt.Errorf("unknown ip: %v", IPAddress)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -198,8 +198,8 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if jc.SyncUserOnLogin {
|
||||
persistUser(user)
|
||||
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -138,8 +138,8 @@ func (ja *JWTSessionAuthenticator) Login(
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig.SyncUserOnLogin {
|
||||
persistUser(user)
|
||||
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,6 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
@ -34,33 +33,6 @@ func (la *LdapAuthenticator) Init() error {
|
||||
|
||||
lc := config.Keys.LdapConfig
|
||||
|
||||
if lc.SyncInterval != "" {
|
||||
interval, err := time.ParseDuration(lc.SyncInterval)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v",
|
||||
lc.SyncInterval)
|
||||
return err
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
log.Info("Sync interval is zero")
|
||||
return nil
|
||||
}
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(interval)
|
||||
for t := range ticker.C {
|
||||
log.Printf("sync started at %s", t.Format(time.RFC3339))
|
||||
if err := la.Sync(); err != nil {
|
||||
log.Errorf("sync failed: %s", err.Error())
|
||||
}
|
||||
log.Print("sync done")
|
||||
}
|
||||
}()
|
||||
} else {
|
||||
log.Info("LDAP configuration key sync_interval invalid")
|
||||
}
|
||||
|
||||
if lc.UserAttr != "" {
|
||||
la.UserAttr = lc.UserAttr
|
||||
} else {
|
||||
|
@ -168,8 +168,8 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
AuthSource: schema.AuthViaOIDC,
|
||||
}
|
||||
|
||||
if config.Keys.OpenIDConfig.SyncUserOnLogin {
|
||||
persistUser(user)
|
||||
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||
handleOIDCUser(user)
|
||||
}
|
||||
|
||||
oa.authentication.SaveSession(rw, r, user)
|
||||
|
@ -7,9 +7,9 @@ package config
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@ -29,10 +29,9 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
|
||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_showFootprint": true,
|
||||
"job_list_usePaging": true,
|
||||
"job_list_usePaging": false,
|
||||
"plot_general_colorBackground": true,
|
||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||
"plot_general_lineWidth": 3,
|
||||
@ -54,20 +53,20 @@ func Init(flagConfigFile string) {
|
||||
raw, err := os.ReadFile(flagConfigFile)
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Fatalf("CONFIG ERROR: %v", err)
|
||||
log.Abortf("Config Init: Could not read config file '%s'.\nError: %s\n", flagConfigFile, err.Error())
|
||||
}
|
||||
} else {
|
||||
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
|
||||
log.Fatalf("Validate config: %v\n", err)
|
||||
log.Abortf("Config Init: Could not validate config file '%s'.\nError: %s\n", flagConfigFile, err.Error())
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
log.Fatalf("could not decode: %v", err)
|
||||
log.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", flagConfigFile, err.Error())
|
||||
}
|
||||
|
||||
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {
|
||||
log.Fatal("At least one cluster required in config!")
|
||||
log.Abort("Config Init: At least one cluster required in config. Exited with error.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
44
internal/config/default_metrics.go
Normal file
44
internal/config/default_metrics.go
Normal file
@ -0,0 +1,44 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type DefaultMetricsCluster struct {
|
||||
Name string `json:"name"`
|
||||
DefaultMetrics string `json:"default_metrics"`
|
||||
}
|
||||
|
||||
type DefaultMetricsConfig struct {
|
||||
Clusters []DefaultMetricsCluster `json:"clusters"`
|
||||
}
|
||||
|
||||
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
|
||||
filePath := "default_metrics.json"
|
||||
if _, err := os.Stat(filePath); os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var cfg DefaultMetricsConfig
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
func ParseMetricsString(s string) []string {
|
||||
parts := strings.Split(s, ",")
|
||||
var metrics []string
|
||||
for _, p := range parts {
|
||||
trimmed := strings.TrimSpace(p)
|
||||
if trimmed != "" {
|
||||
metrics = append(metrics, trimmed)
|
||||
}
|
||||
}
|
||||
return metrics
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -16,11 +16,23 @@ type Count struct {
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
type EnergyFootprintValue struct {
|
||||
Hardware string `json:"hardware"`
|
||||
Metric string `json:"metric"`
|
||||
Value float64 `json:"value"`
|
||||
}
|
||||
|
||||
type FloatRange struct {
|
||||
From float64 `json:"from"`
|
||||
To float64 `json:"to"`
|
||||
}
|
||||
|
||||
type FootprintValue struct {
|
||||
Name string `json:"name"`
|
||||
Stat string `json:"stat"`
|
||||
Value float64 `json:"value"`
|
||||
}
|
||||
|
||||
type Footprints struct {
|
||||
TimeWeights *TimeWeights `json:"timeWeights"`
|
||||
Metrics []*MetricFootprints `json:"metrics"`
|
||||
@ -38,6 +50,7 @@ type IntRangeOutput struct {
|
||||
|
||||
type JobFilter struct {
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
DbID []string `json:"dbId,omitempty"`
|
||||
JobID *StringInput `json:"jobId,omitempty"`
|
||||
ArrayJobID *int `json:"arrayJobId,omitempty"`
|
||||
User *StringInput `json:"user,omitempty"`
|
||||
@ -46,16 +59,14 @@ type JobFilter struct {
|
||||
Cluster *StringInput `json:"cluster,omitempty"`
|
||||
Partition *StringInput `json:"partition,omitempty"`
|
||||
Duration *schema.IntRange `json:"duration,omitempty"`
|
||||
Energy *FloatRange `json:"energy,omitempty"`
|
||||
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
||||
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
|
||||
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
|
||||
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
|
||||
StartTime *schema.TimeRange `json:"startTime,omitempty"`
|
||||
State []schema.JobState `json:"state,omitempty"`
|
||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg,omitempty"`
|
||||
MemBwAvg *FloatRange `json:"memBwAvg,omitempty"`
|
||||
LoadAvg *FloatRange `json:"loadAvg,omitempty"`
|
||||
MemUsedMax *FloatRange `json:"memUsedMax,omitempty"`
|
||||
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||
Exclusive *int `json:"exclusive,omitempty"`
|
||||
Node *StringInput `json:"node,omitempty"`
|
||||
}
|
||||
@ -85,6 +96,19 @@ type JobResultList struct {
|
||||
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||
}
|
||||
|
||||
type JobStats struct {
|
||||
ID int `json:"id"`
|
||||
JobID string `json:"jobId"`
|
||||
StartTime int `json:"startTime"`
|
||||
Duration int `json:"duration"`
|
||||
Cluster string `json:"cluster"`
|
||||
SubCluster string `json:"subCluster"`
|
||||
NumNodes int `json:"numNodes"`
|
||||
NumHWThreads *int `json:"numHWThreads,omitempty"`
|
||||
NumAccelerators *int `json:"numAccelerators,omitempty"`
|
||||
Stats []*NamedStats `json:"stats"`
|
||||
}
|
||||
|
||||
type JobsStatistics struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
@ -120,20 +144,47 @@ type MetricHistoPoint struct {
|
||||
type MetricHistoPoints struct {
|
||||
Metric string `json:"metric"`
|
||||
Unit string `json:"unit"`
|
||||
Stat *string `json:"stat,omitempty"`
|
||||
Data []*MetricHistoPoint `json:"data,omitempty"`
|
||||
}
|
||||
|
||||
type MetricStatItem struct {
|
||||
MetricName string `json:"metricName"`
|
||||
Range *FloatRange `json:"range"`
|
||||
}
|
||||
|
||||
type Mutation struct {
|
||||
}
|
||||
|
||||
type NamedStats struct {
|
||||
Name string `json:"name"`
|
||||
Data *schema.MetricStatistics `json:"data"`
|
||||
}
|
||||
|
||||
type NamedStatsWithScope struct {
|
||||
Name string `json:"name"`
|
||||
Scope schema.MetricScope `json:"scope"`
|
||||
Stats []*ScopedStats `json:"stats"`
|
||||
}
|
||||
|
||||
type NodeMetrics struct {
|
||||
Host string `json:"host"`
|
||||
SubCluster string `json:"subCluster"`
|
||||
Metrics []*JobMetricWithName `json:"metrics"`
|
||||
}
|
||||
|
||||
type NodesResultList struct {
|
||||
Items []*NodeMetrics `json:"items"`
|
||||
Offset *int `json:"offset,omitempty"`
|
||||
Limit *int `json:"limit,omitempty"`
|
||||
Count *int `json:"count,omitempty"`
|
||||
TotalNodes *int `json:"totalNodes,omitempty"`
|
||||
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||
}
|
||||
|
||||
type OrderByInput struct {
|
||||
Field string `json:"field"`
|
||||
Type string `json:"type"`
|
||||
Order SortDirectionEnum `json:"order"`
|
||||
}
|
||||
|
||||
@ -142,7 +193,10 @@ type PageRequest struct {
|
||||
Page int `json:"page"`
|
||||
}
|
||||
|
||||
type Query struct {
|
||||
type ScopedStats struct {
|
||||
Hostname string `json:"hostname"`
|
||||
ID *string `json:"id,omitempty"`
|
||||
Data *schema.MetricStatistics `json:"data"`
|
||||
}
|
||||
|
||||
type StringInput struct {
|
||||
@ -155,8 +209,9 @@ type StringInput struct {
|
||||
}
|
||||
|
||||
type TimeRangeOutput struct {
|
||||
From time.Time `json:"from"`
|
||||
To time.Time `json:"to"`
|
||||
Range *string `json:"range,omitempty"`
|
||||
From time.Time `json:"from"`
|
||||
To time.Time `json:"to"`
|
||||
}
|
||||
|
||||
type TimeWeights struct {
|
||||
@ -197,7 +252,7 @@ func (e Aggregate) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *Aggregate) UnmarshalGQL(v interface{}) error {
|
||||
func (e *Aggregate) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
@ -250,7 +305,7 @@ func (e SortByAggregate) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *SortByAggregate) UnmarshalGQL(v interface{}) error {
|
||||
func (e *SortByAggregate) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
@ -291,7 +346,7 @@ func (e SortDirectionEnum) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
|
||||
func (e *SortDirectionEnum) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
|
@ -1,15 +1,39 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
// This file will not be regenerated automatically.
|
||||
//
|
||||
// It serves as dependency injection for your app, add any dependencies you require here.
|
||||
var (
|
||||
initOnce sync.Once
|
||||
resolverInstance *Resolver
|
||||
)
|
||||
|
||||
type Resolver struct {
|
||||
DB *sqlx.DB
|
||||
Repo *repository.JobRepository
|
||||
}
|
||||
|
||||
func Init() {
|
||||
initOnce.Do(func() {
|
||||
db := repository.GetConnection()
|
||||
resolverInstance = &Resolver{
|
||||
DB: db.DB, Repo: repository.GetJobRepository(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func GetResolverInstance() *Resolver {
|
||||
if resolverInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return resolverInstance
|
||||
}
|
||||
|
@ -2,19 +2,22 @@ package graph
|
||||
|
||||
// This file will be automatically regenerated based on the schema, any resolver implementations
|
||||
// will be copied through when generating and any unknown code will be moved to the end.
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.45
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.66
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
@ -28,15 +31,12 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(&obj.ID)
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
|
||||
}
|
||||
|
||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
|
||||
if obj.State == schema.JobStateRunning {
|
||||
obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
|
||||
}
|
||||
|
||||
// FIXME: Make the hardcoded duration configurable
|
||||
if obj.Exclusive != 1 && obj.Duration > 600 {
|
||||
return r.Repo.FindConcurrentJobs(ctx, obj)
|
||||
}
|
||||
@ -44,8 +44,72 @@ func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*mod
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Footprint is the resolver for the footprint field.
|
||||
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
|
||||
rawFootprint, err := r.Repo.FetchFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.FootprintValue{}
|
||||
for name, value := range rawFootprint {
|
||||
|
||||
parts := strings.Split(name, "_")
|
||||
statPart := parts[len(parts)-1]
|
||||
nameParts := parts[:len(parts)-1]
|
||||
|
||||
res = append(res, &model.FootprintValue{
|
||||
Name: strings.Join(nameParts, "_"),
|
||||
Stat: statPart,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
return res, err
|
||||
}
|
||||
|
||||
// EnergyFootprint is the resolver for the energyFootprint field.
|
||||
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
|
||||
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job energy footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.EnergyFootprintValue{}
|
||||
for name, value := range rawEnergyFootprint {
|
||||
// Suboptimal: Nearly hardcoded metric name expectations
|
||||
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
|
||||
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
|
||||
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
|
||||
matchCore := regexp.MustCompile(`core|Core|CORE`)
|
||||
|
||||
hwType := ""
|
||||
switch test := name; { // NOtice ';' for var declaration
|
||||
case matchCpu.MatchString(test):
|
||||
hwType = "CPU"
|
||||
case matchAcc.MatchString(test):
|
||||
hwType = "Accelerator"
|
||||
case matchMem.MatchString(test):
|
||||
hwType = "Memory"
|
||||
case matchCore.MatchString(test):
|
||||
hwType = "Core"
|
||||
default:
|
||||
hwType = "Other"
|
||||
}
|
||||
|
||||
res = append(res, &model.EnergyFootprintValue{
|
||||
Hardware: hwType,
|
||||
Metric: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
||||
// MetaData is the resolver for the metaData field.
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
|
||||
return r.Repo.FetchMetadata(obj)
|
||||
}
|
||||
|
||||
@ -54,24 +118,48 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
|
||||
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
|
||||
}
|
||||
|
||||
// Name is the resolver for the name field.
|
||||
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
|
||||
panic(fmt.Errorf("not implemented: Name - name"))
|
||||
}
|
||||
|
||||
// CreateTag is the resolver for the createTag field.
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name)
|
||||
if err != nil {
|
||||
log.Warn("Error while creating tag")
|
||||
return nil, err
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && scope == "admin" ||
|
||||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && scope == "global" ||
|
||||
user.Username == scope {
|
||||
// Create in DB
|
||||
id, err := r.Repo.CreateTag(typeArg, name, scope)
|
||||
if err != nil {
|
||||
log.Warn("Error while creating tag")
|
||||
return nil, err
|
||||
}
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||
} else {
|
||||
log.Warnf("Not authorized to create tag with scope: %s", scope)
|
||||
return nil, fmt.Errorf("Not authorized to create tag with scope: %s", scope)
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteTag is the resolver for the deleteTag field.
|
||||
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
||||
// This Uses ID string <-> ID string, removeTagFromList uses []string <-> []int
|
||||
panic(fmt.Errorf("not implemented: DeleteTag - deleteTag"))
|
||||
}
|
||||
|
||||
// AddTagsToJob is the resolver for the addTagsToJob field.
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while adding tag to job")
|
||||
@ -80,15 +168,32 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
|
||||
tags := []*schema.Tag{}
|
||||
for _, tagId := range tagIds {
|
||||
// Get ID
|
||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing tag id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
|
||||
log.Warn("Error while adding tag")
|
||||
return nil, err
|
||||
// Test Exists
|
||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||
if !exists {
|
||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
||||
}
|
||||
|
||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
|
||||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
|
||||
user.Username == tscope {
|
||||
// Add to Job
|
||||
if tags, err = r.Repo.AddTag(user, jid, tid); err != nil {
|
||||
log.Warn("Error while adding tag")
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Not authorized to add tag: %d", tid)
|
||||
return nil, fmt.Errorf("Not authorized to add tag: %d", tid)
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,6 +202,11 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
|
||||
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing job id")
|
||||
@ -105,21 +215,80 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
|
||||
tags := []*schema.Tag{}
|
||||
for _, tagId := range tagIds {
|
||||
// Get ID
|
||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing tag id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
|
||||
log.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
// Test Exists
|
||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||
if !exists {
|
||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
||||
}
|
||||
|
||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
|
||||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
|
||||
user.Username == tscope {
|
||||
// Remove from Job
|
||||
if tags, err = r.Repo.RemoveTag(user, jid, tid); err != nil {
|
||||
log.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Not authorized to remove tag: %d", tid)
|
||||
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// RemoveTagFromList is the resolver for the removeTagFromList field.
|
||||
func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []string) ([]int, error) {
|
||||
// Needs Contextuser
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
tags := []int{}
|
||||
for _, tagId := range tagIds {
|
||||
// Get ID
|
||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing tag id for removal")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Test Exists
|
||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||
if !exists {
|
||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
||||
}
|
||||
|
||||
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && (tscope == "global" || tscope == "admin") || user.Username == tscope {
|
||||
// Remove from DB
|
||||
if err = r.Repo.RemoveTagById(tid); err != nil {
|
||||
log.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
} else {
|
||||
tags = append(tags, int(tid))
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Not authorized to remove tag: %d", tid)
|
||||
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
|
||||
}
|
||||
}
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// UpdateConfiguration is the resolver for the updateConfiguration field.
|
||||
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, repository.GetUserFromContext(ctx)); err != nil {
|
||||
@ -137,7 +306,12 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(nil)
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), nil)
|
||||
}
|
||||
|
||||
// GlobalMetrics is the resolver for the globalMetrics field.
|
||||
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
|
||||
return archive.GlobalMetricList, nil
|
||||
}
|
||||
|
||||
// User is the resolver for the user field.
|
||||
@ -172,7 +346,7 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job, err := r.Repo.FindById(numericId)
|
||||
job, err := r.Repo.FindById(ctx, numericId)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
@ -188,14 +362,24 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
}
|
||||
|
||||
// JobMetrics is the resolver for the jobMetrics field.
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying job for metrics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
||||
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job data")
|
||||
return nil, err
|
||||
@ -215,9 +399,67 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
||||
return res, err
|
||||
}
|
||||
|
||||
// JobsFootprints is the resolver for the jobsFootprints field.
|
||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
return r.jobsFootprints(ctx, filter, metrics)
|
||||
// JobStats is the resolver for the jobStats field.
|
||||
func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
log.Warnf("Error while querying job %s for metadata", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
log.Warnf("Error while loading jobStats data for job id %s", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.NamedStats{}
|
||||
for name, md := range data {
|
||||
res = append(res, &model.NamedStats{
|
||||
Name: name,
|
||||
Data: &md,
|
||||
})
|
||||
}
|
||||
|
||||
return res, err
|
||||
}
|
||||
|
||||
// ScopedJobStats is the resolver for the scopedJobStats field.
|
||||
func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.NamedStatsWithScope, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
log.Warnf("Error while querying job %s for metadata", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadScopedJobStats(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
log.Warnf("Error while loading scopedJobStats data for job id %s", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := make([]*model.NamedStatsWithScope, 0)
|
||||
for name, scoped := range data {
|
||||
for scope, stats := range scoped {
|
||||
|
||||
mdlStats := make([]*model.ScopedStats, 0)
|
||||
for _, stat := range stats {
|
||||
mdlStats = append(mdlStats, &model.ScopedStats{
|
||||
Hostname: stat.Hostname,
|
||||
ID: stat.Id,
|
||||
Data: stat.Data,
|
||||
})
|
||||
}
|
||||
|
||||
res = append(res, &model.NamedStatsWithScope{
|
||||
Name: name,
|
||||
Scope: scope,
|
||||
Stats: mdlStats,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Jobs is the resolver for the jobs field.
|
||||
@ -241,30 +483,39 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !config.Keys.UiDefaults["job_list_usePaging"].(bool) {
|
||||
hasNextPage := false
|
||||
page.Page += 1
|
||||
|
||||
nextJobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying next jobs")
|
||||
return nil, err
|
||||
}
|
||||
if len(nextJobs) > 0 {
|
||||
hasNextPage = true
|
||||
}
|
||||
|
||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||
} else {
|
||||
return &model.JobResultList{Items: jobs, Count: &count}, nil
|
||||
// Note: Even if App-Default 'config.Keys.UiDefaults["job_list_usePaging"]' is set, always return hasNextPage boolean.
|
||||
// Users can decide in frontend to use continuous scroll, even if app-default is paging!
|
||||
/*
|
||||
Example Page 4 @ 10 IpP : Does item 41 exist?
|
||||
Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
|
||||
*/
|
||||
nextPage := &model.PageRequest{
|
||||
ItemsPerPage: 1,
|
||||
Page: ((page.Page * page.ItemsPerPage) + 1),
|
||||
}
|
||||
nextJobs, err := r.Repo.QueryJobs(ctx, filter, nextPage, order)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying next jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hasNextPage := false
|
||||
if len(nextJobs) == 1 {
|
||||
hasNextPage = true
|
||||
}
|
||||
|
||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||
}
|
||||
|
||||
// JobsStatistics is the resolver for the jobsStatistics field.
|
||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) ([]*model.JobsStatistics, error) {
|
||||
var err error
|
||||
var stats []*model.JobsStatistics
|
||||
|
||||
// Top Level Defaults
|
||||
var defaultDurationBins string = "1h"
|
||||
var defaultMetricBins int = 10
|
||||
|
||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||
if groupBy == nil {
|
||||
@ -298,8 +549,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
|
||||
|
||||
if numDurationBins == nil {
|
||||
numDurationBins = &defaultDurationBins
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
|
||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0], numDurationBins)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -309,8 +565,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
if requireField(ctx, "histMetrics") {
|
||||
|
||||
if numMetricBins == nil {
|
||||
numMetricBins = &defaultMetricBins
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0])
|
||||
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0], numMetricBins)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -322,6 +583,62 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// JobsMetricStats is the resolver for the jobsMetricStats field.
|
||||
func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.JobStats, error) {
|
||||
// No Paging, Fixed Order by StartTime ASC
|
||||
order := &model.OrderByInput{
|
||||
Field: "startTime",
|
||||
Type: "col",
|
||||
Order: "ASC",
|
||||
}
|
||||
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, nil, order)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying jobs for comparison")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.JobStats{}
|
||||
for _, job := range jobs {
|
||||
data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
log.Warnf("Error while loading comparison jobStats data for job id %d", job.JobID)
|
||||
continue
|
||||
// return nil, err
|
||||
}
|
||||
|
||||
sres := []*model.NamedStats{}
|
||||
for name, md := range data {
|
||||
sres = append(sres, &model.NamedStats{
|
||||
Name: name,
|
||||
Data: &md,
|
||||
})
|
||||
}
|
||||
|
||||
numThreadsInt := int(job.NumHWThreads)
|
||||
numAccsInt := int(job.NumAcc)
|
||||
res = append(res, &model.JobStats{
|
||||
ID: int(job.ID),
|
||||
JobID: strconv.Itoa(int(job.JobID)),
|
||||
StartTime: int(job.StartTime.Unix()),
|
||||
Duration: int(job.Duration),
|
||||
Cluster: job.Cluster,
|
||||
SubCluster: job.SubCluster,
|
||||
NumNodes: int(job.NumNodes),
|
||||
NumHWThreads: &numThreadsInt,
|
||||
NumAccelerators: &numAccsInt,
|
||||
Stats: sres,
|
||||
})
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
||||
// JobsFootprints is the resolver for the jobsFootprints field.
|
||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
|
||||
return r.jobsFootprints(ctx, filter, metrics)
|
||||
}
|
||||
|
||||
// RooflineHeatmap is the resolver for the rooflineHeatmap field.
|
||||
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
||||
@ -330,8 +647,8 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
|
||||
// NodeMetrics is the resolver for the nodeMetrics field.
|
||||
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasRole(schema.RoleAdmin) {
|
||||
return nil, errors.New("you need to be an administrator for this query")
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
@ -340,9 +657,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
}
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading node data")
|
||||
log.Warn("error while loading node data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -352,7 +669,10 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
Host: hostname,
|
||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||
}
|
||||
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
|
||||
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||
if err != nil {
|
||||
log.Warnf("error in nodeMetrics resolver: %s", err)
|
||||
}
|
||||
|
||||
for metric, scopedMetrics := range metrics {
|
||||
for _, scopedMetric := range scopedMetrics {
|
||||
@ -370,6 +690,68 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
return nodeMetrics, nil
|
||||
}
|
||||
|
||||
// NodeMetricsList is the resolver for the nodeMetricsList field.
|
||||
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, *resolution, from, to, page, ctx)
|
||||
if err != nil {
|
||||
log.Warn("error while loading node data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodeMetricsList := make([]*model.NodeMetrics, 0, len(data))
|
||||
for hostname, metrics := range data {
|
||||
host := &model.NodeMetrics{
|
||||
Host: hostname,
|
||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||
}
|
||||
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||
if err != nil {
|
||||
log.Warnf("error in nodeMetrics resolver: %s", err)
|
||||
}
|
||||
|
||||
for metric, scopedMetrics := range metrics {
|
||||
for scope, scopedMetric := range scopedMetrics {
|
||||
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
|
||||
Name: metric,
|
||||
Scope: scope,
|
||||
Metric: scopedMetric,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
nodeMetricsList = append(nodeMetricsList, host)
|
||||
}
|
||||
|
||||
nodeMetricsListResult := &model.NodesResultList{
|
||||
Items: nodeMetricsList,
|
||||
TotalNodes: &totalNodes,
|
||||
HasNextPage: &hasNextPage,
|
||||
}
|
||||
|
||||
return nodeMetricsListResult, nil
|
||||
}
|
||||
|
||||
// NumberOfNodes is the resolver for the numberOfNodes field.
|
||||
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
||||
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
||||
@ -385,6 +767,9 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
|
||||
// Job returns generated.JobResolver implementation.
|
||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||
|
||||
// MetricValue returns generated.MetricValueResolver implementation.
|
||||
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
|
||||
|
||||
// Mutation returns generated.MutationResolver implementation.
|
||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||
|
||||
@ -396,6 +781,7 @@ func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subCluste
|
||||
|
||||
type clusterResolver struct{ *Resolver }
|
||||
type jobResolver struct{ *Resolver }
|
||||
type metricValueResolver struct{ *Resolver }
|
||||
type mutationResolver struct{ *Resolver }
|
||||
type queryResolver struct{ *Resolver }
|
||||
type subClusterResolver struct{ *Resolver }
|
||||
|
@ -11,7 +11,7 @@ import (
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@ -24,8 +24,8 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
rows int, cols int,
|
||||
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||
|
||||
minX float64, minY float64, maxX float64, maxY float64,
|
||||
) ([][]float64, error) {
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
log.Error("Error while querying jobs for roofline")
|
||||
@ -47,7 +47,14 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
continue
|
||||
}
|
||||
|
||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
// resolution := 0
|
||||
|
||||
// for _, mc := range metricConfigs {
|
||||
// resolution = max(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||
return nil, err
|
||||
@ -120,7 +127,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
continue
|
||||
}
|
||||
|
||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Error("Error while loading averages for footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
@ -8,9 +8,9 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
@ -42,8 +42,8 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err = dec.Decode(&jobMeta); err != nil {
|
||||
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err = dec.Decode(&job); err != nil {
|
||||
log.Warn("Error while decoding raw json metadata for import")
|
||||
return err
|
||||
}
|
||||
@ -67,32 +67,68 @@ func HandleImportFlag(flag string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// checkJobData(&jobData)
|
||||
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||
// if err != nil {
|
||||
// log.Warn("Error while finding job in jobRepository")
|
||||
// return err
|
||||
// }
|
||||
//
|
||||
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
|
||||
// }
|
||||
//
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: Other metrics...
|
||||
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
|
||||
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
|
||||
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used")
|
||||
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
@ -110,7 +146,7 @@ func HandleImportFlag(flag string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
|
||||
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
|
||||
log.Error("Error while importing job")
|
||||
return err
|
||||
}
|
||||
@ -122,8 +158,8 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
log.Error("Error while adding or creating tag")
|
||||
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
log.Error("Error while adding or creating tag on import")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +45,9 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
@ -82,7 +85,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
||||
|
@ -7,6 +7,7 @@ package importer
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -16,6 +17,11 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
|
||||
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||
)
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
func InitDB() error {
|
||||
@ -60,13 +66,66 @@ func InitDB() error {
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
}
|
||||
|
||||
// TODO: Other metrics...
|
||||
job.LoadAvg = loadJobStat(jobMeta, "cpu_load")
|
||||
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
|
||||
job.MemUsedMax = loadJobStat(jobMeta, "mem_used")
|
||||
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
@ -88,7 +147,8 @@ func InitDB() error {
|
||||
continue
|
||||
}
|
||||
|
||||
id, err := r.TransactionAdd(t, job)
|
||||
id, err := r.TransactionAddNamed(t,
|
||||
repository.NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
@ -99,7 +159,9 @@ func InitDB() error {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
if !ok {
|
||||
tagId, err = r.TransactionAddTag(t, tag)
|
||||
tagId, err = r.TransactionAdd(t,
|
||||
addTagQuery,
|
||||
tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
log.Errorf("Error adding tag: %v", err)
|
||||
errorOccured++
|
||||
@ -108,7 +170,9 @@ func InitDB() error {
|
||||
tags[tagstr] = tagId
|
||||
}
|
||||
|
||||
r.TransactionSetTag(t, id, tagId)
|
||||
r.TransactionAdd(t,
|
||||
setTagQuery,
|
||||
id, tagId)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
@ -150,18 +214,6 @@ func SanityChecks(job *schema.BaseJob) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
if metric == "mem_used" {
|
||||
return stats.Max
|
||||
} else {
|
||||
return stats.Avg
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func checkJobData(d *schema.JobData) error {
|
||||
for _, scopes := range *d {
|
||||
// var newUnit schema.Unit
|
||||
|
1486
internal/importer/testdata/cluster-fritz.json
vendored
1486
internal/importer/testdata/cluster-fritz.json
vendored
File diff suppressed because it is too large
Load Diff
383
internal/metricDataDispatcher/dataLoader.go
Normal file
383
internal/metricDataDispatcher/dataLoader.go
Normal file
@ -0,0 +1,383 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricDataDispatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
|
||||
func cacheKey(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) string {
|
||||
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
||||
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
|
||||
job.ID, job.State, metrics, scopes, resolution)
|
||||
}
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
config.Keys.DisableArchive {
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
cluster := archive.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
// return err, 0, 0 // Reactivating will block archiving on one partial error
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
size = jd.Size()
|
||||
} else {
|
||||
var jd_temp schema.JobData
|
||||
jd_temp, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
//Deep copy the cached archive hashmap
|
||||
jd = metricdata.DeepCopy(jd_temp)
|
||||
|
||||
//Resampling for archived data.
|
||||
//Pass the resolution from frontend here.
|
||||
for _, v := range jd {
|
||||
for _, v_ := range v {
|
||||
timestep := 0
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
v_.Timestep = timestep
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
}
|
||||
}
|
||||
jd = res
|
||||
}
|
||||
size = jd.Size()
|
||||
}
|
||||
|
||||
ttl = 5 * time.Hour
|
||||
if job.State == schema.JobStateRunning {
|
||||
ttl = 2 * time.Minute
|
||||
}
|
||||
|
||||
// FIXME: Review: Is this really necessary or correct.
|
||||
// Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes)
|
||||
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
||||
// to be available at the scope 'node'. If a job has a lot of nodes,
|
||||
// statisticsSeries should be available so that a min/median/max Graph can be
|
||||
// used instead of a lot of single lines.
|
||||
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||
// Existing (archived) StatsSeries can be 'min/mean/max'!
|
||||
const maxSeriesSize int = 15
|
||||
for _, scopes := range jd {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
|
||||
nodeScopeRequested := false
|
||||
for _, scope := range scopes {
|
||||
if scope == schema.MetricScopeNode {
|
||||
nodeScopeRequested = true
|
||||
}
|
||||
}
|
||||
|
||||
if nodeScopeRequested {
|
||||
jd.AddNodeScope("flops_any")
|
||||
jd.AddNodeScope("mem_bw")
|
||||
}
|
||||
|
||||
// Round Resulting Stat Values
|
||||
jd.RoundMetricStats()
|
||||
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
data [][]schema.Float,
|
||||
ctx context.Context,
|
||||
) error {
|
||||
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||
}
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
continue
|
||||
}
|
||||
|
||||
sum := 0.0
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
}
|
||||
data[i] = append(data[i], schema.Float(sum))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for statsTable in frontend: Return scoped statistics by metric.
|
||||
func LoadScopedJobStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
) (schema.ScopedJobStats, error) {
|
||||
|
||||
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||
return archive.LoadScopedStatsFromArchive(job, metrics, scopes)
|
||||
}
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
|
||||
}
|
||||
|
||||
scopedStats, err := repo.LoadScopedStats(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
log.Errorf("error while loading scoped statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return scopedStats, nil
|
||||
}
|
||||
|
||||
// Used for polar plots in frontend: Aggregates statistics for all nodes to single values for job per metric.
|
||||
func LoadJobStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.MetricStatistics, error) {
|
||||
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||
return archive.LoadStatsFromArchive(job, metrics)
|
||||
}
|
||||
|
||||
data := make(map[string]schema.MetricStatistics, len(metrics))
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return data, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
log.Errorf("error while loading statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
|
||||
return data, err
|
||||
}
|
||||
|
||||
for _, m := range metrics {
|
||||
sum, avg, min, max := 0.0, 0.0, 0.0, 0.0
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[m] = schema.MetricStatistics{Min: min, Avg: avg, Max: max}
|
||||
continue
|
||||
}
|
||||
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
min = math.Min(min, node.Min)
|
||||
max = math.Max(max, node.Max)
|
||||
}
|
||||
|
||||
data[m] = schema.MetricStatistics{
|
||||
Avg: (math.Round((sum/float64(job.NumNodes))*100) / 100),
|
||||
Min: (math.Round(min*100) / 100),
|
||||
Max: (math.Round(max*100) / 100),
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Used for the classic node/system view. Returns a map of nodes to a map of metrics.
|
||||
func LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||
if err != nil {
|
||||
return nil, 0, false, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, resolution, from, to, page, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||
const maxSeriesSize int = 8
|
||||
for _, jd := range data {
|
||||
for _, scopes := range jd {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) < maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, totalNodes, hasNextPage, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, totalNodes, hasNextPage, nil
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -10,9 +10,12 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@ -60,7 +63,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
log.Infof("InfluxDB 2 Backend: Resolution Scaling not Implemented, will return default timestep. Requested Resolution %d", resolution)
|
||||
|
||||
measurementsConds := make([]string, 0, len(metrics))
|
||||
for _, m := range metrics {
|
||||
@ -84,7 +90,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
query := ""
|
||||
switch scope {
|
||||
case "node":
|
||||
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
|
||||
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows <-- Resolution could be added here?
|
||||
// log.Info("Scope 'node' requested. ")
|
||||
query = fmt.Sprintf(`
|
||||
from(bucket: "%s")
|
||||
@ -114,6 +120,12 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
// idb.bucket,
|
||||
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
|
||||
// measurementsCond, hostsCond)
|
||||
case "hwthread":
|
||||
log.Info(" Scope 'hwthread' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
case "accelerator":
|
||||
log.Info(" Scope 'accelerator' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
default:
|
||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
||||
continue
|
||||
@ -171,6 +183,11 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
}
|
||||
case "socket":
|
||||
continue
|
||||
case "accelerator":
|
||||
continue
|
||||
case "hwthread":
|
||||
// See below @ core
|
||||
continue
|
||||
case "core":
|
||||
continue
|
||||
// Include Series.Id in hostSeries
|
||||
@ -299,6 +316,53 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Used in Job-View StatsTable
|
||||
// UNTESTED
|
||||
func (idb *InfluxDBv2DataRepository) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
||||
|
||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for statsTable
|
||||
scopedJobStats := make(schema.ScopedJobStats)
|
||||
data, err := idb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for scopedJobStats")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for metric, metricData := range data {
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric]; !ok {
|
||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
||||
}
|
||||
|
||||
for _, series := range metricData[scope].Series {
|
||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
||||
Hostname: series.Hostname,
|
||||
Data: &series.Statistics,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scopedJobStats, nil
|
||||
}
|
||||
|
||||
// Used in Systems-View @ Node-Overview
|
||||
// UNTESTED
|
||||
func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
@ -306,8 +370,206 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
|
||||
// TODO : Implement to be used in Analysis- und System/Node-View
|
||||
log.Infof("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
|
||||
// Note: scopes[] Array will be ignored, only return node scope
|
||||
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||
// CONVERT ARGS TO INFLUX
|
||||
measurementsConds := make([]string, 0)
|
||||
for _, m := range metrics {
|
||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
||||
}
|
||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
||||
|
||||
hostsConds := make([]string, 0)
|
||||
if nodes == nil {
|
||||
var allNodes []string
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
allNodes = append(nodes, nodeList.PrintList()...)
|
||||
}
|
||||
for _, node := range allNodes {
|
||||
nodes = append(nodes, node)
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
||||
}
|
||||
} else {
|
||||
for _, node := range nodes {
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
||||
}
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
// BUILD AND PERFORM QUERY
|
||||
query := fmt.Sprintf(`
|
||||
from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => (%s) and (%s) )
|
||||
|> drop(columns: ["_start", "_stop"])
|
||||
|> group(columns: ["hostname", "_measurement"])
|
||||
|> aggregateWindow(every: 60s, fn: mean)
|
||||
|> drop(columns: ["_time"])`,
|
||||
idb.bucket,
|
||||
idb.formatTime(from), idb.formatTime(to),
|
||||
measurementsCond, hostsCond)
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// HANDLE QUERY RETURN
|
||||
// Collect Float Arrays for Node@Metric -> No Scope Handling!
|
||||
influxData := make(map[string]map[string][]schema.Float)
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
host, field := row.ValueByKey("hostname").(string), row.Measurement()
|
||||
|
||||
influxHostData, ok := influxData[host]
|
||||
if !ok {
|
||||
influxHostData = make(map[string][]schema.Float)
|
||||
influxData[host] = influxHostData
|
||||
}
|
||||
|
||||
influxFieldData, ok := influxData[host][field]
|
||||
if !ok {
|
||||
influxFieldData = make([]schema.Float, 0)
|
||||
influxData[host][field] = influxFieldData
|
||||
}
|
||||
|
||||
val, ok := row.Value().(float64)
|
||||
if ok {
|
||||
influxData[host][field] = append(influxData[host][field], schema.Float(val))
|
||||
} else {
|
||||
influxData[host][field] = append(influxData[host][field], schema.Float(0))
|
||||
}
|
||||
}
|
||||
|
||||
// BUILD FUNCTION RETURN
|
||||
data := make(map[string]map[string][]*schema.JobMetric)
|
||||
for node, metricData := range influxData {
|
||||
|
||||
nodeData, ok := data[node]
|
||||
if !ok {
|
||||
nodeData = make(map[string][]*schema.JobMetric)
|
||||
data[node] = nodeData
|
||||
}
|
||||
|
||||
for metric, floatArray := range metricData {
|
||||
avg, min, max := 0.0, 0.0, 0.0
|
||||
for _, val := range floatArray {
|
||||
avg += float64(val)
|
||||
min = math.Min(min, float64(val))
|
||||
max = math.Max(max, float64(val))
|
||||
}
|
||||
|
||||
stats := schema.MetricStatistics{
|
||||
Avg: (math.Round((avg/float64(len(floatArray)))*100) / 100),
|
||||
Min: (math.Round(min*100) / 100),
|
||||
Max: (math.Round(max*100) / 100),
|
||||
}
|
||||
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
nodeData[metric] = append(nodeData[metric], &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Timestep,
|
||||
Series: []schema.Series{
|
||||
{
|
||||
Hostname: node,
|
||||
Statistics: stats,
|
||||
Data: floatArray,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Used in Systems-View @ Node-List
|
||||
// UNTESTED
|
||||
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for NodeList
|
||||
|
||||
// 0) Init additional vars
|
||||
var totalNodes int = 0
|
||||
var hasNextPage bool = false
|
||||
|
||||
// 1) Get list of all nodes
|
||||
var nodes []string
|
||||
if subCluster != "" {
|
||||
scNodes := archive.NodeLists[cluster][subCluster]
|
||||
nodes = scNodes.PrintList()
|
||||
} else {
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
nodes = append(nodes, nodeList.PrintList()...)
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Filter nodes
|
||||
if nodeFilter != "" {
|
||||
filteredNodes := []string{}
|
||||
for _, node := range nodes {
|
||||
if strings.Contains(node, nodeFilter) {
|
||||
filteredNodes = append(filteredNodes, node)
|
||||
}
|
||||
}
|
||||
nodes = filteredNodes
|
||||
}
|
||||
|
||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
|
||||
totalNodes = len(nodes)
|
||||
sort.Strings(nodes)
|
||||
|
||||
// 3) Apply paging
|
||||
if len(nodes) > page.ItemsPerPage {
|
||||
start := (page.Page - 1) * page.ItemsPerPage
|
||||
end := start + page.ItemsPerPage
|
||||
if end > len(nodes) {
|
||||
end = len(nodes)
|
||||
hasNextPage = false
|
||||
} else {
|
||||
hasNextPage = true
|
||||
}
|
||||
nodes = nodes[start:end]
|
||||
}
|
||||
|
||||
// 4) Fetch And Convert Data, use idb.LoadNodeData() for query
|
||||
|
||||
rawNodeData, err := idb.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
log.Error(fmt.Sprintf("Error while loading influx nodeData for nodeListData %#v\n", err))
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
|
||||
data := make(map[string]schema.JobData)
|
||||
for node, nodeData := range rawNodeData {
|
||||
// Init Nested Map Data Structures If Not Found
|
||||
hostData, ok := data[node]
|
||||
if !ok {
|
||||
hostData = make(schema.JobData)
|
||||
data[node] = hostData
|
||||
}
|
||||
|
||||
for metric, nodeMetricData := range nodeData {
|
||||
metricData, ok := hostData[metric]
|
||||
if !ok {
|
||||
metricData = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
data[node][metric] = metricData
|
||||
}
|
||||
|
||||
data[node][metric][schema.MetricScopeNode] = nodeMetricData[0] // Only Node Scope Returned from loadNodeData
|
||||
}
|
||||
}
|
||||
|
||||
return data, totalNodes, hasNextPage, nil
|
||||
}
|
||||
|
@ -8,13 +8,11 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@ -24,21 +22,24 @@ type MetricDataRepository interface {
|
||||
Init(rawConfig json.RawMessage) error
|
||||
|
||||
// Return the JobData for the given job, only with the requested metrics.
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
|
||||
|
||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope only.
|
||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics at the requested scopes for that node.
|
||||
// Return a map of metrics to a map of scopes to the scoped metric statistics of the job.
|
||||
LoadScopedStats(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.ScopedJobStats, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
|
||||
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
|
||||
LoadNodeListData(cluster, subCluster, nodeFilter string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error)
|
||||
}
|
||||
|
||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||
|
||||
var useArchive bool
|
||||
|
||||
func Init(disableArchive bool) error {
|
||||
useArchive = !disableArchive
|
||||
func Init() error {
|
||||
for _, cluster := range config.Keys.Clusters {
|
||||
if cluster.MetricDataRepository != nil {
|
||||
var kind struct {
|
||||
@ -73,284 +74,13 @@ func Init(disableArchive bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
!useArchive {
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
|
||||
if !ok {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
cluster := archive.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Errorf("partial error: %s", err.Error())
|
||||
return err, 0, 0
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
size = jd.Size()
|
||||
} else {
|
||||
jd, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
}
|
||||
}
|
||||
jd = res
|
||||
}
|
||||
size = jd.Size()
|
||||
}
|
||||
|
||||
ttl = 5 * time.Hour
|
||||
if job.State == schema.JobStateRunning {
|
||||
ttl = 2 * time.Minute
|
||||
}
|
||||
|
||||
prepareJobData(job, jd, scopes)
|
||||
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
data [][]schema.Float,
|
||||
ctx context.Context,
|
||||
) error {
|
||||
if job.State != schema.JobStateRunning && useArchive {
|
||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||
}
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
continue
|
||||
}
|
||||
|
||||
sum := 0.0
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
}
|
||||
data[i] = append(data[i], schema.Float(sum))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for the node/system view. Returns a map of nodes to a map of metrics.
|
||||
func LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
|
||||
var err error
|
||||
repo, ok := metricDataRepos[cluster]
|
||||
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func cacheKey(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
) string {
|
||||
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
||||
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]",
|
||||
job.ID, job.State, metrics, scopes)
|
||||
}
|
||||
|
||||
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
||||
// to be available at the scope 'node'. If a job has a lot of nodes,
|
||||
// statisticsSeries should be available so that a min/mean/max Graph can be
|
||||
// used instead of a lot of single lines.
|
||||
func prepareJobData(
|
||||
job *schema.Job,
|
||||
jobData schema.JobData,
|
||||
scopes []schema.MetricScope,
|
||||
) {
|
||||
const maxSeriesSize int = 15
|
||||
for _, scopes := range jobData {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
|
||||
nodeScopeRequested := false
|
||||
for _, scope := range scopes {
|
||||
if scope == schema.MetricScopeNode {
|
||||
nodeScopeRequested = true
|
||||
}
|
||||
}
|
||||
|
||||
if nodeScopeRequested {
|
||||
jobData.AddNodeScope("flops_any")
|
||||
jobData.AddNodeScope("mem_bw")
|
||||
}
|
||||
}
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
// TODO: Talk about this! What resolutions to store data at...
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
if job.NumNodes <= 8 {
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// TODO/FIXME: Calc average for non-node metrics as well!
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if !useArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
return repo, err
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@ -166,10 +167,10 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var rt http.RoundTripper = nil
|
||||
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
|
||||
prom_pw := promcfg.Secret(prom_pw)
|
||||
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
|
||||
rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
|
||||
} else {
|
||||
if config.Username != "" {
|
||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
|
||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
|
||||
}
|
||||
}
|
||||
// init client
|
||||
@ -204,8 +205,8 @@ func (pdb *PrometheusDataRepository) FormatQuery(
|
||||
metric string,
|
||||
scope schema.MetricScope,
|
||||
nodes []string,
|
||||
cluster string) (string, error) {
|
||||
|
||||
cluster string,
|
||||
) (string, error) {
|
||||
args := PromQLArgs{}
|
||||
if len(nodes) > 0 {
|
||||
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
|
||||
@ -233,12 +234,13 @@ func (pdb *PrometheusDataRepository) RowToSeries(
|
||||
from time.Time,
|
||||
step int64,
|
||||
steps int64,
|
||||
row *promm.SampleStream) schema.Series {
|
||||
row *promm.SampleStream,
|
||||
) schema.Series {
|
||||
ts := from.Unix()
|
||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||
// init array of expected length with NaN
|
||||
values := make([]schema.Float, steps+1)
|
||||
for i, _ := range values {
|
||||
for i := range values {
|
||||
values[i] = schema.NaN
|
||||
}
|
||||
// copy recorded values from prom sample pair
|
||||
@ -263,8 +265,9 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
// TODO respect requested scope
|
||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
@ -306,7 +309,6 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
@ -335,7 +337,7 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
pdb.RowToSeries(from, step, steps, row))
|
||||
}
|
||||
// only add metric if at least one host returned data
|
||||
if !ok && len(jobMetric.Series) > 0{
|
||||
if !ok && len(jobMetric.Series) > 0 {
|
||||
jobData[metric][scope] = jobMetric
|
||||
}
|
||||
// sort by hostname to get uniform coloring
|
||||
@ -351,12 +353,12 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
func (pdb *PrometheusDataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
// map of metrics of nodes of stats
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for stats")
|
||||
return nil, err
|
||||
@ -376,7 +378,8 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
t0 := time.Now()
|
||||
// Map of hosts of metrics of value slices
|
||||
data := make(map[string]map[string][]*schema.JobMetric)
|
||||
@ -411,7 +414,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
@ -445,3 +447,188 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Implemented by NHR@FAU; Used in Job-View StatsTable
|
||||
func (pdb *PrometheusDataRepository) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
||||
|
||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
||||
scopedJobStats := make(schema.ScopedJobStats)
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for scopedJobStats")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for metric, metricData := range data {
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric]; !ok {
|
||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
||||
}
|
||||
|
||||
for _, series := range metricData[scope].Series {
|
||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
||||
Hostname: series.Hostname,
|
||||
Data: &series.Statistics,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scopedJobStats, nil
|
||||
}
|
||||
|
||||
// Implemented by NHR@FAU; Used in NodeList-View
|
||||
func (pdb *PrometheusDataRepository) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
||||
|
||||
// 0) Init additional vars
|
||||
var totalNodes int = 0
|
||||
var hasNextPage bool = false
|
||||
|
||||
// 1) Get list of all nodes
|
||||
var nodes []string
|
||||
if subCluster != "" {
|
||||
scNodes := archive.NodeLists[cluster][subCluster]
|
||||
nodes = scNodes.PrintList()
|
||||
} else {
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
nodes = append(nodes, nodeList.PrintList()...)
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Filter nodes
|
||||
if nodeFilter != "" {
|
||||
filteredNodes := []string{}
|
||||
for _, node := range nodes {
|
||||
if strings.Contains(node, nodeFilter) {
|
||||
filteredNodes = append(filteredNodes, node)
|
||||
}
|
||||
}
|
||||
nodes = filteredNodes
|
||||
}
|
||||
|
||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
|
||||
totalNodes = len(nodes)
|
||||
sort.Strings(nodes)
|
||||
|
||||
// 3) Apply paging
|
||||
if len(nodes) > page.ItemsPerPage {
|
||||
start := (page.Page - 1) * page.ItemsPerPage
|
||||
end := start + page.ItemsPerPage
|
||||
if end > len(nodes) {
|
||||
end = len(nodes)
|
||||
hasNextPage = false
|
||||
} else {
|
||||
hasNextPage = true
|
||||
}
|
||||
nodes = nodes[start:end]
|
||||
}
|
||||
|
||||
// 4) Fetch Data, based on pdb.LoadNodeData()
|
||||
|
||||
t0 := time.Now()
|
||||
// Map of hosts of jobData
|
||||
data := make(map[string]schema.JobData)
|
||||
|
||||
// query db for each metric
|
||||
// TODO: scopes seems to be always empty
|
||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
metricConfig := archive.GetMetricConfig(cluster, metric)
|
||||
if metricConfig == nil {
|
||||
log.Warnf("Error in LoadNodeListData: Metric %s for cluster %s not configured", metric, cluster)
|
||||
return nil, totalNodes, hasNextPage, errors.New("Prometheus config error")
|
||||
}
|
||||
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
|
||||
if err != nil {
|
||||
log.Warn("Error while formatting prometheus query")
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
|
||||
// ranged query over all nodes
|
||||
r := promv1.Range{
|
||||
Start: from,
|
||||
End: to,
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
return nil, totalNodes, hasNextPage, errors.New("Prometheus query error")
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
log.Warnf("Warnings: %v\n", warnings)
|
||||
}
|
||||
|
||||
step := int64(metricConfig.Timestep)
|
||||
steps := int64(to.Sub(from).Seconds()) / step
|
||||
|
||||
// iter rows of host, metric, values
|
||||
for _, row := range result.(promm.Matrix) {
|
||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||
|
||||
hostdata, ok := data[hostname]
|
||||
if !ok {
|
||||
hostdata = make(schema.JobData)
|
||||
data[hostname] = hostdata
|
||||
}
|
||||
|
||||
metricdata, ok := hostdata[metric]
|
||||
if !ok {
|
||||
metricdata = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
data[hostname][metric] = metricdata
|
||||
}
|
||||
|
||||
// output per host, metric and scope
|
||||
scopeData, ok := metricdata[scope]
|
||||
if !ok {
|
||||
scopeData = &schema.JobMetric{
|
||||
Unit: metricConfig.Unit,
|
||||
Timestep: metricConfig.Timestep,
|
||||
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
||||
}
|
||||
data[hostname][metric][scope] = scopeData
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
t1 := time.Since(t0)
|
||||
log.Debugf("LoadNodeListData of %v nodes took %s", len(data), t1)
|
||||
return data, totalNodes, hasNextPage, nil
|
||||
}
|
||||
|
@ -9,10 +9,11 @@ import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
@ -27,14 +28,25 @@ func (tmdr *TestMetricDataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
return TestLoadDataCallback(job, metrics, scopes, ctx)
|
||||
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
metrics []string,
|
||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
||||
|
||||
panic("TODO")
|
||||
}
|
||||
@ -48,3 +60,62 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
|
||||
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
||||
var jd schema.JobData
|
||||
|
||||
jd = make(schema.JobData, len(jd_temp))
|
||||
for k, v := range jd_temp {
|
||||
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
|
||||
for k_, v_ := range v {
|
||||
jd[k][k_] = new(schema.JobMetric)
|
||||
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
|
||||
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
|
||||
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
|
||||
jd[k][k_].Series[i].Id = v_.Series[i].Id
|
||||
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
|
||||
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
|
||||
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
|
||||
}
|
||||
jd[k][k_].Timestep = v_.Timestep
|
||||
jd[k][k_].Unit.Base = v_.Unit.Base
|
||||
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
|
||||
if v_.StatisticsSeries != nil {
|
||||
// Init Slices
|
||||
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
|
||||
jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max))
|
||||
jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min))
|
||||
jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median))
|
||||
jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean))
|
||||
// Copy Data
|
||||
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
|
||||
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
|
||||
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
|
||||
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
|
||||
// Handle Percentiles
|
||||
for k__, v__ := range v_.StatisticsSeries.Percentiles {
|
||||
jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__))
|
||||
copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__)
|
||||
}
|
||||
} else {
|
||||
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
|
||||
}
|
||||
}
|
||||
}
|
||||
return jd
|
||||
}
|
||||
|
@ -59,17 +59,15 @@ func Connect(driver string, db string) {
|
||||
} else {
|
||||
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
case "mysql":
|
||||
opts.URL += "?multiStatements=true"
|
||||
dbHandle, err = sqlx.Open("mysql", opts.URL)
|
||||
if err != nil {
|
||||
log.Fatalf("sqlx.Open() error: %v", err)
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported database driver: %s", driver)
|
||||
log.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error())
|
||||
}
|
||||
|
||||
dbHandle.SetMaxOpenConns(opts.MaxOpenConnections)
|
||||
@ -80,7 +78,7 @@ func Connect(driver string, db string) {
|
||||
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
|
||||
err = checkDBVersion(driver, dbHandle.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -5,17 +5,16 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
@ -30,12 +29,10 @@ var (
|
||||
)
|
||||
|
||||
type JobRepository struct {
|
||||
DB *sqlx.DB
|
||||
stmtCache *sq.StmtCache
|
||||
cache *lrucache.Cache
|
||||
archiveChannel chan *schema.Job
|
||||
driver string
|
||||
archivePending sync.WaitGroup
|
||||
DB *sqlx.DB
|
||||
stmtCache *sq.StmtCache
|
||||
cache *lrucache.Cache
|
||||
driver string
|
||||
}
|
||||
|
||||
func GetJobRepository() *JobRepository {
|
||||
@ -46,47 +43,48 @@ func GetJobRepository() *JobRepository {
|
||||
DB: db.DB,
|
||||
driver: db.Driver,
|
||||
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
archiveChannel: make(chan *schema.Job, 128),
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
}
|
||||
// start archiving worker
|
||||
go jobRepoInstance.archivingWorker()
|
||||
})
|
||||
return jobRepoInstance
|
||||
}
|
||||
|
||||
var jobColumns []string = []string{
|
||||
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
|
||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
|
||||
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
||||
"job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data",
|
||||
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
|
||||
}
|
||||
|
||||
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
job := &schema.Job{}
|
||||
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil {
|
||||
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||
log.Warn("Error while unmarhsaling raw resources json")
|
||||
log.Warn("Error while unmarshaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
job.RawResources = nil
|
||||
|
||||
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
job.RawFootprint = nil
|
||||
|
||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
||||
if job.Duration == 0 && job.State == schema.JobStateRunning {
|
||||
// Always ensure accurate duration for running jobs
|
||||
if job.State == schema.JobStateRunning {
|
||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||
}
|
||||
|
||||
job.RawResources = nil
|
||||
return job, nil
|
||||
}
|
||||
|
||||
@ -205,7 +203,10 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
|
||||
if _, err = sq.Update("job").
|
||||
Set("meta_data", job.RawMetaData).
|
||||
Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
@ -214,222 +215,54 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
return archive.UpdateMetadata(job, job.MetaData)
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
log.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) ([]*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindConcurrentJobs(
|
||||
ctx context.Context,
|
||||
job *schema.Job,
|
||||
) (*model.JobLinkResultList, error) {
|
||||
if job == nil {
|
||||
if len(job.RawFootprint) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
query = query.Where("cluster = ?", job.Cluster)
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
stopTime = time.Now().Unix()
|
||||
} else {
|
||||
stopTime = startTime + int64(job.Duration)
|
||||
}
|
||||
|
||||
// Add 200s overlap for jobs start time at the end
|
||||
startTimeTail := startTime + 10
|
||||
stopTimeTail := stopTime - 200
|
||||
startTimeFront := startTime + 200
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
log.Debugf("Timer FetchFootprint %s", time.Since(start))
|
||||
return job.Footprint, nil
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
|
||||
start := time.Now()
|
||||
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.EnergyFootprint = cached.(map[string]float64)
|
||||
return job.EnergyFootprint, nil
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job energy_footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
if len(job.RawEnergyFootprint) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
cnt := len(items)
|
||||
|
||||
return &model.JobLinkResultList{
|
||||
ListQuery: &queryString,
|
||||
Items: items,
|
||||
Count: &cnt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw energy footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
res, err := r.DB.NamedExec(`INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, `+"`partition`"+`, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data
|
||||
);`, job)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
|
||||
log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
|
||||
return job.EnergyFootprint, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
@ -461,119 +294,22 @@ func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) MarkArchived(
|
||||
jobId int64,
|
||||
monitoringStatus int32,
|
||||
metricStats map[string]schema.JobStatistics,
|
||||
) error {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
for metric, stats := range metricStats {
|
||||
switch metric {
|
||||
case "flops_any":
|
||||
stmt = stmt.Set("flops_any_avg", stats.Avg)
|
||||
case "mem_used":
|
||||
stmt = stmt.Set("mem_used_max", stats.Max)
|
||||
case "mem_bw":
|
||||
stmt = stmt.Set("mem_bw_avg", stats.Avg)
|
||||
case "load":
|
||||
stmt = stmt.Set("load_avg", stats.Avg)
|
||||
case "cpu_load":
|
||||
stmt = stmt.Set("load_avg", stats.Avg)
|
||||
case "net_bw":
|
||||
stmt = stmt.Set("net_bw_avg", stats.Avg)
|
||||
case "file_bw":
|
||||
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
||||
default:
|
||||
log.Debugf("MarkArchived() Metric '%v' unknown", metric)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warn("Error while marking job as archived")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
func (r *JobRepository) archivingWorker() {
|
||||
for {
|
||||
select {
|
||||
case job, ok := <-r.archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := r.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// Update the jobs database entry one last time:
|
||||
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
r.archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
|
||||
r.archivePending.Add(1)
|
||||
r.archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func (r *JobRepository) WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
r.archivePending.Wait()
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
|
||||
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
||||
return searchterm, "", "", ""
|
||||
} else { // Has to have letters and logged-in user for other guesses
|
||||
if user != nil {
|
||||
// Find username in jobs (match)
|
||||
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false)
|
||||
// Find username by username in job table (match)
|
||||
uresult, _ := r.FindColumnValue(user, searchterm, "job", "hpc_user", "hpc_user", false)
|
||||
if uresult != "" {
|
||||
return "", uresult, "", ""
|
||||
}
|
||||
// Find username by name (like)
|
||||
nresult, _ := r.FindColumnValue(user, searchterm, "user", "username", "name", true)
|
||||
// Find username by real name in hpc_user table (like)
|
||||
nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true)
|
||||
if nresult != "" {
|
||||
return "", nresult, "", ""
|
||||
}
|
||||
// Find projectId in jobs (match)
|
||||
// Find projectId by projectId in job table (match)
|
||||
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
|
||||
if presult != "" {
|
||||
return "", "", presult, ""
|
||||
@ -655,7 +391,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
start := time.Now()
|
||||
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
||||
parts := []string{}
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||
return nil, 0, 1000
|
||||
}
|
||||
|
||||
@ -712,6 +448,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
return subclusters, nil
|
||||
}
|
||||
|
||||
// FIXME: Set duration to requested walltime?
|
||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
start := time.Now()
|
||||
res, err := sq.Update("job").
|
||||
@ -740,6 +477,46 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||
query := sq.Select(jobColumns...).From("job").
|
||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.duration > 600")
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
log.Infof("Return job count %d", len(jobs))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateDuration() error {
|
||||
stmnt := sq.Update("job").
|
||||
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
|
||||
Where("job_state = 'running'")
|
||||
|
||||
_, err := stmnt.RunWith(r.stmtCache).Exec()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
|
||||
var query sq.SelectBuilder
|
||||
|
||||
@ -778,27 +555,118 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
|
||||
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
|
||||
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
||||
);`
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
|
||||
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) MarkArchived(
|
||||
stmt sq.UpdateBuilder,
|
||||
monitoringStatus int32,
|
||||
) sq.UpdateBuilder {
|
||||
return stmt.Set("monitoring_status", monitoringStatus)
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateEnergy(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
energyFootprint := make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
|
||||
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
energyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
|
||||
// log.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateFootprint(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
footprint := make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
var statType string
|
||||
for _, gm := range archive.GlobalMetricList {
|
||||
if gm.Name == fp {
|
||||
statType = gm.Footprint
|
||||
}
|
||||
}
|
||||
|
||||
if statType != "avg" && statType != "min" && statType != "max" {
|
||||
log.Warnf("unknown statType for footprint update: %s", statType)
|
||||
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
|
||||
}
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
footprint[name] = LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(footprint); err != nil {
|
||||
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("footprint", string(rawFootprint)), nil
|
||||
}
|
||||
|
75
internal/repository/jobCreate.go
Normal file
75
internal/repository/jobCreate.go
Normal file
@ -0,0 +1,75 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
return r.InsertJob(job)
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
263
internal/repository/jobFind.go
Normal file
263
internal/repository/jobFind.go
Normal file
@ -0,0 +1,263 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
|
||||
|
||||
log.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) ([]*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdWithUser executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id. The user is passed directly,
|
||||
// instead as part of the context.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
|
||||
q, qerr := SecurityCheckWithUser(user, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdDirect executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByJobId executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id and the clustername.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// IsJobOwner executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id,a username and the cluster.
|
||||
// It returns a bool.
|
||||
// If job was found, user is owner: test err != sql.ErrNoRows
|
||||
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
|
||||
q := sq.Select("id").
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.hpc_user = ?", user).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
return err != sql.ErrNoRows
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindConcurrentJobs(
|
||||
ctx context.Context,
|
||||
job *schema.Job,
|
||||
) (*model.JobLinkResultList, error) {
|
||||
if job == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
query = query.Where("cluster = ?", job.Cluster)
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
stopTime = time.Now().Unix()
|
||||
} else {
|
||||
stopTime = startTime + int64(job.Duration)
|
||||
}
|
||||
|
||||
// Add 200s overlap for jobs start time at the end
|
||||
startTimeTail := startTime + 10
|
||||
stopTimeTail := stopTime - 200
|
||||
startTimeFront := startTime + 200
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
|
||||
queryRunning = queryRunning.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
|
||||
query = query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
cnt := len(items)
|
||||
|
||||
return &model.JobLinkResultList{
|
||||
ListQuery: &queryString,
|
||||
Items: items,
|
||||
Count: &cnt,
|
||||
}, nil
|
||||
}
|
349
internal/repository/jobQuery.go
Normal file
349
internal/repository/jobQuery.go
Normal file
@ -0,0 +1,349 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
func (r *JobRepository) QueryJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter,
|
||||
page *model.PageRequest,
|
||||
order *model.OrderByInput,
|
||||
) ([]*schema.Job, error) {
|
||||
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
if order != nil {
|
||||
field := toSnakeCase(order.Field)
|
||||
if order.Type == "col" {
|
||||
// "col": Fixed column name query
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
|
||||
}
|
||||
} else {
|
||||
// "foot": Order by footprint JSON field values
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(meta_data)")
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if page != nil && page.ItemsPerPage != -1 {
|
||||
limit := uint64(page.ItemsPerPage)
|
||||
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
queryString, queryVars, _ := query.ToSql()
|
||||
log.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows (Jobs)")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) CountJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter,
|
||||
) (int, error) {
|
||||
// DISTICT count for tags filters, does not affect other queries
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
|
||||
if qerr != nil {
|
||||
return 0, qerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
var count int
|
||||
if err := query.RunWith(r.DB).Scan(&count); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
if user == nil {
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user context is nil")
|
||||
}
|
||||
|
||||
switch {
|
||||
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
|
||||
return query, nil
|
||||
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
|
||||
return query, nil
|
||||
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||
if len(user.Projects) != 0 {
|
||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
|
||||
} else {
|
||||
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
}
|
||||
case user.HasRole(schema.RoleUser): // User : Only personal jobs
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
default: // No known Role, return error
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user has no or unknown roles")
|
||||
}
|
||||
}
|
||||
|
||||
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
user := GetUserFromContext(ctx)
|
||||
|
||||
return SecurityCheckWithUser(user, query)
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if filter.Tags != nil {
|
||||
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||
}
|
||||
if filter.DbID != nil {
|
||||
dbIDs := make([]string, len(filter.DbID))
|
||||
for i, val := range filter.DbID {
|
||||
dbIDs[i] = val
|
||||
}
|
||||
query = query.Where(sq.Eq{"job.id": dbIDs})
|
||||
}
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
}
|
||||
if filter.ArrayJobID != nil {
|
||||
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
|
||||
}
|
||||
if filter.User != nil {
|
||||
query = buildStringCondition("job.hpc_user", filter.User, query)
|
||||
}
|
||||
if filter.Project != nil {
|
||||
query = buildStringCondition("job.project", filter.Project, query)
|
||||
}
|
||||
if filter.JobName != nil {
|
||||
query = buildMetaJsonCondition("jobName", filter.JobName, query)
|
||||
}
|
||||
if filter.Cluster != nil {
|
||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||
}
|
||||
if filter.Partition != nil {
|
||||
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
|
||||
}
|
||||
if filter.StartTime != nil {
|
||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||
}
|
||||
if filter.Duration != nil {
|
||||
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||
}
|
||||
if filter.MinRunningFor != nil {
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
||||
}
|
||||
if filter.Exclusive != nil {
|
||||
query = query.Where("job.exclusive = ?", *filter.Exclusive)
|
||||
}
|
||||
if filter.State != nil {
|
||||
states := make([]string, len(filter.State))
|
||||
for i, val := range filter.State {
|
||||
states[i] = string(val)
|
||||
}
|
||||
|
||||
query = query.Where(sq.Eq{"job.job_state": states})
|
||||
}
|
||||
if filter.NumNodes != nil {
|
||||
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
|
||||
}
|
||||
if filter.NumAccelerators != nil {
|
||||
query = buildIntCondition("job.num_acc", filter.NumAccelerators, query)
|
||||
}
|
||||
if filter.NumHWThreads != nil {
|
||||
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
|
||||
}
|
||||
if filter.Node != nil {
|
||||
query = buildResourceJsonCondition("hostname", filter.Node, query)
|
||||
}
|
||||
if filter.Energy != nil {
|
||||
query = buildFloatCondition("job.energy", filter.Energy, query)
|
||||
}
|
||||
if filter.MetricStats != nil {
|
||||
for _, ms := range filter.MetricStats {
|
||||
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
|
||||
}
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != nil && cond.To != nil {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
||||
} else if cond.From != nil {
|
||||
return query.Where("? <= "+field, cond.From.Unix())
|
||||
} else if cond.To != nil {
|
||||
return query.Where(field+" <= ?", cond.To.Unix())
|
||||
} else if cond.Range != "" {
|
||||
now := time.Now().Unix()
|
||||
var then int64
|
||||
switch cond.Range {
|
||||
case "last6h":
|
||||
then = now - (60 * 60 * 6)
|
||||
case "last24h":
|
||||
then = now - (60 * 60 * 24)
|
||||
case "last7d":
|
||||
then = now - (60 * 60 * 24 * 7)
|
||||
case "last30d":
|
||||
then = now - (60 * 60 * 24 * 30)
|
||||
default:
|
||||
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
|
||||
return query
|
||||
}
|
||||
return query.Where(field+" BETWEEN ? AND ?", then, now)
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(footprint)")
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
|
||||
}
|
||||
|
||||
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.Eq != nil {
|
||||
return query.Where(field+" = ?", *cond.Eq)
|
||||
}
|
||||
if cond.Neq != nil {
|
||||
return query.Where(field+" != ?", *cond.Neq)
|
||||
}
|
||||
if cond.StartsWith != nil {
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
}
|
||||
if cond.EndsWith != nil {
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
|
||||
}
|
||||
if cond.Contains != nil {
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
if cond.In != nil {
|
||||
queryElements := make([]string, len(cond.In))
|
||||
copy(queryElements, cond.In)
|
||||
return query.Where(sq.Or{sq.Eq{field: queryElements}})
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(meta_data)")
|
||||
// add "AND" Sql query Block for field match
|
||||
if cond.Eq != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq)
|
||||
}
|
||||
if cond.Neq != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") != ?", *cond.Neq)
|
||||
}
|
||||
if cond.StartsWith != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
}
|
||||
if cond.EndsWith != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.EndsWith))
|
||||
}
|
||||
if cond.Contains != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func buildResourceJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(resources)")
|
||||
// add "AND" Sql query Block for field match
|
||||
if cond.Eq != nil {
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") = ?)", *cond.Eq)
|
||||
}
|
||||
if cond.Neq != nil { // Currently Unused
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") != ?)", *cond.Neq)
|
||||
}
|
||||
if cond.StartsWith != nil { // Currently Unused
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\")) LIKE ?)", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
}
|
||||
if cond.EndsWith != nil { // Currently Unused
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") LIKE ?)", fmt.Sprint("%", *cond.EndsWith))
|
||||
}
|
||||
if cond.Contains != nil {
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") LIKE ?)", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
var (
|
||||
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
)
|
||||
|
||||
func toSnakeCase(str string) string {
|
||||
for _, c := range str {
|
||||
if c == '\'' || c == '\\' {
|
||||
log.Panic("toSnakeCase() attack vector!")
|
||||
}
|
||||
}
|
||||
|
||||
str = strings.ReplaceAll(str, "'", "")
|
||||
str = strings.ReplaceAll(str, "\\", "")
|
||||
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
|
||||
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
|
||||
return strings.ToLower(snake)
|
||||
}
|
@ -5,9 +5,11 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
@ -28,8 +30,10 @@ func TestFind(t *testing.T) {
|
||||
func TestFindById(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
job, err := r.FindById(5)
|
||||
noErr(t, err)
|
||||
job, err := r.FindById(getContext(t), 5)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// fmt.Printf("%+v", job)
|
||||
|
||||
@ -41,8 +45,22 @@ func TestFindById(t *testing.T) {
|
||||
func TestGetTags(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
tags, counts, err := r.CountTags(nil)
|
||||
noErr(t, err)
|
||||
const contextUserKey ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
|
||||
|
||||
// Test Tag has Scope "global"
|
||||
tags, counts, err := r.CountTags(GetUserFromContext(ctx))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fmt.Printf("TAGS %+v \n", tags)
|
||||
// fmt.Printf("COUNTS %+v \n", counts)
|
||||
|
@ -16,7 +16,7 @@ import (
|
||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||
)
|
||||
|
||||
const Version uint = 7
|
||||
const Version uint = 8
|
||||
|
||||
//go:embed migrations/*
|
||||
var migrationFiles embed.FS
|
||||
@ -54,7 +54,7 @@ func checkDBVersion(backend string, db *sql.DB) error {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported database backend: %s", backend)
|
||||
log.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
|
||||
}
|
||||
|
||||
v, dirty, err := m.Version()
|
||||
@ -102,7 +102,7 @@ func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err erro
|
||||
return m, err
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported database backend: %s", backend)
|
||||
log.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
@ -114,6 +114,14 @@ func MigrateDB(backend string, db string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
v, dirty, err := m.Version()
|
||||
|
||||
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||
|
||||
if dirty {
|
||||
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
||||
}
|
||||
|
||||
if err := m.Up(); err != nil {
|
||||
if err == migrate.ErrNoChange {
|
||||
log.Info("DB already up to date!")
|
||||
|
@ -0,0 +1,83 @@
|
||||
ALTER TABLE job DROP energy;
|
||||
ALTER TABLE job DROP energy_footprint;
|
||||
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||
ALTER TABLE job ADD COLUMN load_avg;
|
||||
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||
|
||||
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||
|
||||
ALTER TABLE job DROP footprint;
|
||||
-- Do not use reserved keywords anymore
|
||||
RENAME TABLE hpc_user TO `user`;
|
||||
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
|
||||
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_user;
|
||||
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||
DROP INDEX IF EXISTS jobs_user_duration;
|
||||
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_project;
|
||||
DROP INDEX IF EXISTS jobs_project_user;
|
||||
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||
DROP INDEX IF EXISTS jobs_project_duration;
|
||||
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_starttime;
|
||||
DROP INDEX IF EXISTS jobs_duration;
|
||||
DROP INDEX IF EXISTS jobs_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||
DROP INDEX IF EXISTS jobs_energy_starttime;
|
123
internal/repository/migrations/mysql/08_add-footprint.up.sql
Normal file
123
internal/repository/migrations/mysql/08_add-footprint.up.sql
Normal file
@ -0,0 +1,123 @@
|
||||
DROP INDEX IF EXISTS job_stats ON job;
|
||||
DROP INDEX IF EXISTS job_by_user ON job;
|
||||
DROP INDEX IF EXISTS job_by_starttime ON job;
|
||||
DROP INDEX IF EXISTS job_by_job_id ON job;
|
||||
DROP INDEX IF EXISTS job_list ON job;
|
||||
DROP INDEX IF EXISTS job_list_user ON job;
|
||||
DROP INDEX IF EXISTS job_list_users ON job;
|
||||
DROP INDEX IF EXISTS job_list_users_start ON job;
|
||||
|
||||
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||
ALTER TABLE job ADD COLUMN energy_footprint JSON;
|
||||
|
||||
ALTER TABLE job ADD COLUMN footprint JSON;
|
||||
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||
|
||||
-- Do not use reserved keywords anymore
|
||||
RENAME TABLE `user` TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
|
||||
|
||||
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
|
||||
|
||||
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||
|
||||
ALTER TABLE job DROP flops_any_avg;
|
||||
ALTER TABLE job DROP mem_bw_avg;
|
||||
ALTER TABLE job DROP mem_used_max;
|
||||
ALTER TABLE job DROP load_avg;
|
||||
ALTER TABLE job DROP net_bw_avg;
|
||||
ALTER TABLE job DROP net_data_vol_total;
|
||||
ALTER TABLE job DROP file_bw_avg;
|
||||
ALTER TABLE job DROP file_data_vol_total;
|
||||
|
||||
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Optimize DB index usage
|
103
internal/repository/migrations/sqlite3/08_add-footprint.down.sql
Normal file
103
internal/repository/migrations/sqlite3/08_add-footprint.down.sql
Normal file
@ -0,0 +1,103 @@
|
||||
ALTER TABLE job DROP energy;
|
||||
ALTER TABLE job DROP energy_footprint;
|
||||
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||
ALTER TABLE job ADD COLUMN load_avg;
|
||||
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||
|
||||
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||
|
||||
ALTER TABLE job DROP footprint;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_user;
|
||||
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||
DROP INDEX IF EXISTS jobs_user_duration;
|
||||
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_user_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_user_numacc;
|
||||
DROP INDEX IF EXISTS jobs_user_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_project;
|
||||
DROP INDEX IF EXISTS jobs_project_user;
|
||||
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||
DROP INDEX IF EXISTS jobs_project_duration;
|
||||
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_project_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_project_numacc;
|
||||
DROP INDEX IF EXISTS jobs_project_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numacc;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_starttime;
|
||||
DROP INDEX IF EXISTS jobs_duration;
|
||||
DROP INDEX IF EXISTS jobs_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_numacc;
|
||||
DROP INDEX IF EXISTS jobs_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numhwthreads_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||
DROP INDEX IF EXISTS jobs_energy_starttime;
|
142
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
142
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
@ -0,0 +1,142 @@
|
||||
DROP INDEX IF EXISTS job_stats;
|
||||
DROP INDEX IF EXISTS job_by_user;
|
||||
DROP INDEX IF EXISTS job_by_starttime;
|
||||
DROP INDEX IF EXISTS job_by_job_id;
|
||||
DROP INDEX IF EXISTS job_list;
|
||||
DROP INDEX IF EXISTS job_list_user;
|
||||
DROP INDEX IF EXISTS job_list_users;
|
||||
DROP INDEX IF EXISTS job_list_users_start;
|
||||
|
||||
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
|
||||
|
||||
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
|
||||
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||
|
||||
-- Do not use reserved keywords anymore
|
||||
ALTER TABLE "user" RENAME TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN "user" TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN "partition" TO cluster_partition;
|
||||
|
||||
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||
|
||||
ALTER TABLE job DROP flops_any_avg;
|
||||
ALTER TABLE job DROP mem_bw_avg;
|
||||
ALTER TABLE job DROP mem_used_max;
|
||||
ALTER TABLE job DROP load_avg;
|
||||
ALTER TABLE job DROP net_bw_avg;
|
||||
ALTER TABLE job DROP net_data_vol_total;
|
||||
ALTER TABLE job DROP file_bw_avg;
|
||||
ALTER TABLE job DROP file_data_vol_total;
|
||||
|
||||
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Optimize DB index usage
|
||||
PRAGMA optimize;
|
@ -1,253 +0,0 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
func (r *JobRepository) QueryJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter,
|
||||
page *model.PageRequest,
|
||||
order *model.OrderByInput) ([]*schema.Job, error) {
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
if order != nil {
|
||||
field := toSnakeCase(order.Field)
|
||||
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
|
||||
}
|
||||
}
|
||||
|
||||
if page != nil && page.ItemsPerPage != -1 {
|
||||
limit := uint64(page.ItemsPerPage)
|
||||
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows (Jobs)")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) CountJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter) (int, error) {
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
|
||||
if qerr != nil {
|
||||
return 0, qerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
var count int
|
||||
if err := query.RunWith(r.DB).Scan(&count); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
user := GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user context is nil")
|
||||
} else if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleApi}) { // Admin & Co. : All jobs
|
||||
return query, nil
|
||||
} else if user.HasRole(schema.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||
if len(user.Projects) != 0 {
|
||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
|
||||
} else {
|
||||
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
}
|
||||
} else if user.HasRole(schema.RoleUser) { // User : Only personal jobs
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
} else {
|
||||
// Shortterm compatibility: Return User-Query if no roles:
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
// // On the longterm: Return Error instead of fallback:
|
||||
// var qnil sq.SelectBuilder
|
||||
// return qnil, fmt.Errorf("user '%s' with unknown roles [%#v]", user.Username, user.Roles)
|
||||
}
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if filter.Tags != nil {
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
|
||||
}
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
}
|
||||
if filter.ArrayJobID != nil {
|
||||
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
|
||||
}
|
||||
if filter.User != nil {
|
||||
query = buildStringCondition("job.user", filter.User, query)
|
||||
}
|
||||
if filter.Project != nil {
|
||||
query = buildStringCondition("job.project", filter.Project, query)
|
||||
}
|
||||
if filter.JobName != nil {
|
||||
query = buildStringCondition("job.meta_data", filter.JobName, query)
|
||||
}
|
||||
if filter.Cluster != nil {
|
||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||
}
|
||||
if filter.Partition != nil {
|
||||
query = buildStringCondition("job.partition", filter.Partition, query)
|
||||
}
|
||||
if filter.StartTime != nil {
|
||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||
}
|
||||
if filter.Duration != nil {
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
|
||||
}
|
||||
if filter.MinRunningFor != nil {
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
||||
}
|
||||
if filter.State != nil {
|
||||
states := make([]string, len(filter.State))
|
||||
for i, val := range filter.State {
|
||||
states[i] = string(val)
|
||||
}
|
||||
|
||||
query = query.Where(sq.Eq{"job.job_state": states})
|
||||
}
|
||||
if filter.NumNodes != nil {
|
||||
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
|
||||
}
|
||||
if filter.NumAccelerators != nil {
|
||||
query = buildIntCondition("job.num_acc", filter.NumAccelerators, query)
|
||||
}
|
||||
if filter.NumHWThreads != nil {
|
||||
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
|
||||
}
|
||||
if filter.Node != nil {
|
||||
query = buildStringCondition("job.resources", filter.Node, query)
|
||||
}
|
||||
if filter.FlopsAnyAvg != nil {
|
||||
query = buildFloatCondition("job.flops_any_avg", filter.FlopsAnyAvg, query)
|
||||
}
|
||||
if filter.MemBwAvg != nil {
|
||||
query = buildFloatCondition("job.mem_bw_avg", filter.MemBwAvg, query)
|
||||
}
|
||||
if filter.LoadAvg != nil {
|
||||
query = buildFloatCondition("job.load_avg", filter.LoadAvg, query)
|
||||
}
|
||||
if filter.MemUsedMax != nil {
|
||||
query = buildFloatCondition("job.mem_used_max", filter.MemUsedMax, query)
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != nil && cond.To != nil {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
||||
} else if cond.From != nil {
|
||||
return query.Where("? <= "+field, cond.From.Unix())
|
||||
} else if cond.To != nil {
|
||||
return query.Where(field+" <= ?", cond.To.Unix())
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.Eq != nil {
|
||||
return query.Where(field+" = ?", *cond.Eq)
|
||||
}
|
||||
if cond.Neq != nil {
|
||||
return query.Where(field+" != ?", *cond.Neq)
|
||||
}
|
||||
if cond.StartsWith != nil {
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
}
|
||||
if cond.EndsWith != nil {
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
|
||||
}
|
||||
if cond.Contains != nil {
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
if cond.In != nil {
|
||||
queryElements := make([]string, len(cond.In))
|
||||
for i, val := range cond.In {
|
||||
queryElements[i] = val
|
||||
}
|
||||
return query.Where(sq.Or{sq.Eq{field: queryElements}})
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
|
||||
func toSnakeCase(str string) string {
|
||||
for _, c := range str {
|
||||
if c == '\'' || c == '\\' {
|
||||
log.Panic("toSnakeCase() attack vector!")
|
||||
}
|
||||
}
|
||||
|
||||
str = strings.ReplaceAll(str, "'", "")
|
||||
str = strings.ReplaceAll(str, "\\", "")
|
||||
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
|
||||
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
|
||||
return strings.ToLower(snake)
|
||||
}
|
@ -55,7 +55,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, err := db.FindById(jobId)
|
||||
_, err := db.FindById(getContext(b), jobId)
|
||||
noErr(b, err)
|
||||
}
|
||||
})
|
||||
@ -111,7 +111,7 @@ func BenchmarkDB_QueryJobs(b *testing.B) {
|
||||
user := "mppi133h"
|
||||
filter.User = &model.StringInput{Eq: &user}
|
||||
page := &model.PageRequest{ItemsPerPage: 50, Page: 1}
|
||||
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
|
||||
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
|
||||
|
||||
b.Run("QueryJobs", func(b *testing.B) {
|
||||
db := setup(b)
|
||||
|
@ -8,12 +8,11 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@ -22,7 +21,7 @@ import (
|
||||
|
||||
// GraphQL validation should make sure that no unkown values can be specified.
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.user",
|
||||
model.AggregateUser: "job.hpc_user",
|
||||
model.AggregateProject: "job.project",
|
||||
model.AggregateCluster: "job.cluster",
|
||||
}
|
||||
@ -41,8 +40,8 @@ var sortBy2column = map[model.SortByAggregate]string{
|
||||
func (r *JobRepository) buildCountQuery(
|
||||
filter []*model.JobFilter,
|
||||
kind string,
|
||||
col string) sq.SelectBuilder {
|
||||
|
||||
col string,
|
||||
) sq.SelectBuilder {
|
||||
var query sq.SelectBuilder
|
||||
|
||||
if col != "" {
|
||||
@ -69,16 +68,16 @@ func (r *JobRepository) buildCountQuery(
|
||||
|
||||
func (r *JobRepository) buildStatsQuery(
|
||||
filter []*model.JobFilter,
|
||||
col string) sq.SelectBuilder {
|
||||
|
||||
col string,
|
||||
) sq.SelectBuilder {
|
||||
var query sq.SelectBuilder
|
||||
castType := r.getCastType()
|
||||
|
||||
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||
|
||||
if col != "" {
|
||||
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(col, "COUNT(job.id) as totalJobs",
|
||||
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
||||
@ -86,10 +85,9 @@ func (r *JobRepository) buildStatsQuery(
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
||||
).From("job").GroupBy(col)
|
||||
|
||||
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
|
||||
} else {
|
||||
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select("COUNT(job.id)",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
||||
@ -108,15 +106,15 @@ func (r *JobRepository) buildStatsQuery(
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||
user := GetUserFromContext(ctx)
|
||||
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
|
||||
if name != "" {
|
||||
return name
|
||||
} else {
|
||||
return "-"
|
||||
}
|
||||
}
|
||||
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||
// user := GetUserFromContext(ctx)
|
||||
// name, _ := r.FindColumnValue(user, id, "hpc_user", "name", "username", false)
|
||||
// if name != "" {
|
||||
// return name
|
||||
// } else {
|
||||
// return "-"
|
||||
// }
|
||||
// }
|
||||
|
||||
func (r *JobRepository) getCastType() string {
|
||||
var castType string
|
||||
@ -138,8 +136,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
filter []*model.JobFilter,
|
||||
page *model.PageRequest,
|
||||
sortBy *model.SortByAggregate,
|
||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
|
||||
groupBy *model.Aggregate,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildStatsQuery(filter, col)
|
||||
@ -168,14 +166,20 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var name sql.NullString
|
||||
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
|
||||
var personName string
|
||||
|
||||
if name.Valid {
|
||||
personName = name.String
|
||||
}
|
||||
|
||||
if jobs.Valid {
|
||||
totalJobs = int(jobs.Int64)
|
||||
@ -205,12 +209,12 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
totalAccHours = int(accHours.Int64)
|
||||
}
|
||||
|
||||
if col == "job.user" {
|
||||
name := r.getUserName(ctx, id.String)
|
||||
if col == "job.hpc_user" {
|
||||
// name := r.getUserName(ctx, id.String)
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
ID: id.String,
|
||||
Name: name,
|
||||
Name: personName,
|
||||
TotalJobs: totalJobs,
|
||||
TotalWalltime: totalWalltime,
|
||||
TotalNodes: totalNodes,
|
||||
@ -218,7 +222,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
TotalCores: totalCores,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccs: totalAccs,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
} else {
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
@ -230,7 +235,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
TotalCores: totalCores,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccs: totalAccs,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -241,8 +247,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
|
||||
func (r *JobRepository) JobsStats(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter) ([]*model.JobsStatistics, error) {
|
||||
|
||||
filter []*model.JobFilter,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
query := r.buildStatsQuery(filter, "")
|
||||
query, err := SecurityCheck(ctx, query)
|
||||
@ -277,18 +283,36 @@ func (r *JobRepository) JobsStats(
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalNodeHours: totalNodeHours,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
}
|
||||
|
||||
log.Debugf("Timer JobStats %s", time.Since(start))
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
switch statType {
|
||||
case "avg":
|
||||
return stats.Avg
|
||||
case "max":
|
||||
return stats.Max
|
||||
case "min":
|
||||
return stats.Min
|
||||
default:
|
||||
log.Errorf("Unknown stat type %s", statType)
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func (r *JobRepository) JobCountGrouped(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
|
||||
groupBy *model.Aggregate,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildCountQuery(filter, "", col)
|
||||
@ -315,7 +339,8 @@ func (r *JobRepository) JobCountGrouped(
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(cnt.Int64)})
|
||||
TotalJobs: int(cnt.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -328,8 +353,8 @@ func (r *JobRepository) AddJobCountGrouped(
|
||||
filter []*model.JobFilter,
|
||||
groupBy *model.Aggregate,
|
||||
stats []*model.JobsStatistics,
|
||||
kind string) ([]*model.JobsStatistics, error) {
|
||||
|
||||
kind string,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildCountQuery(filter, kind, col)
|
||||
@ -376,8 +401,8 @@ func (r *JobRepository) AddJobCount(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
stats []*model.JobsStatistics,
|
||||
kind string) ([]*model.JobsStatistics, error) {
|
||||
|
||||
kind string,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
query := r.buildCountQuery(filter, kind, "")
|
||||
query, err := SecurityCheck(ctx, query)
|
||||
@ -420,15 +445,41 @@ func (r *JobRepository) AddJobCount(
|
||||
func (r *JobRepository) AddHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
|
||||
stat *model.JobsStatistics,
|
||||
durationBins *string,
|
||||
) (*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
|
||||
var targetBinCount int
|
||||
var targetBinSize int
|
||||
switch {
|
||||
case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
|
||||
targetBinCount = 60
|
||||
targetBinSize = 60
|
||||
case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
|
||||
targetBinCount = 72
|
||||
targetBinSize = 600
|
||||
case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
|
||||
targetBinCount = 48
|
||||
targetBinSize = 3600
|
||||
case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
|
||||
targetBinCount = 12
|
||||
targetBinSize = 21600
|
||||
case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
|
||||
targetBinCount = 14
|
||||
targetBinSize = 43200
|
||||
default: // 24h
|
||||
targetBinCount = 24
|
||||
targetBinSize = 3600
|
||||
}
|
||||
|
||||
castType := r.getCastType()
|
||||
var err error
|
||||
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter)
|
||||
// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
|
||||
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType)
|
||||
stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job statistics histogram: running jobs")
|
||||
log.Warn("Error while loading job statistics histogram: job duration")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -459,14 +510,16 @@ func (r *JobRepository) AddMetricHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
metrics []string,
|
||||
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
|
||||
stat *model.JobsStatistics,
|
||||
targetBinCount *int,
|
||||
) (*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
|
||||
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins
|
||||
for _, f := range filter {
|
||||
if f.State != nil {
|
||||
if len(f.State) == 1 && f.State[0] == "running" {
|
||||
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter)
|
||||
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter, targetBinCount)
|
||||
log.Debugf("Timer AddMetricHistograms %s", time.Since(start))
|
||||
return stat, nil
|
||||
}
|
||||
@ -475,7 +528,7 @@ func (r *JobRepository) AddMetricHistograms(
|
||||
|
||||
// All other cases: Query and make bins in sqlite directly
|
||||
for _, m := range metrics {
|
||||
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter)
|
||||
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter, targetBinCount)
|
||||
if err != nil {
|
||||
log.Warnf("Error while loading job metric statistics histogram: %s", m)
|
||||
continue
|
||||
@ -491,8 +544,8 @@ func (r *JobRepository) AddMetricHistograms(
|
||||
func (r *JobRepository) jobsStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
value string,
|
||||
filters []*model.JobFilter) ([]*model.HistoPoint, error) {
|
||||
|
||||
filters []*model.JobFilter,
|
||||
) ([]*model.HistoPoint, error) {
|
||||
start := time.Now()
|
||||
query, qerr := SecurityCheck(ctx,
|
||||
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||
@ -512,6 +565,7 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
||||
}
|
||||
|
||||
points := make([]*model.HistoPoint, 0)
|
||||
// is it possible to introduce zero values here? requires info about bincount
|
||||
for rows.Next() {
|
||||
point := model.HistoPoint{}
|
||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||
@ -525,39 +579,79 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
||||
return points, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
value string,
|
||||
filters []*model.JobFilter,
|
||||
binSizeSeconds int,
|
||||
targetBinCount *int,
|
||||
) ([]*model.HistoPoint, error) {
|
||||
start := time.Now()
|
||||
query, qerr := SecurityCheck(ctx,
|
||||
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
// Setup Array
|
||||
points := make([]*model.HistoPoint, 0)
|
||||
for i := 1; i <= *targetBinCount; i++ {
|
||||
point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
|
||||
points = append(points, &point)
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Fill Array at matching $Value
|
||||
for rows.Next() {
|
||||
point := model.HistoPoint{}
|
||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, e := range points {
|
||||
if e.Value == (point.Value * binSizeSeconds) {
|
||||
// Note:
|
||||
// Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
|
||||
// causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
|
||||
e.Count = point.Count
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||
return points, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metric string,
|
||||
filters []*model.JobFilter) (*model.MetricHistoPoints, error) {
|
||||
|
||||
var dbMetric string
|
||||
switch metric {
|
||||
case "cpu_load":
|
||||
dbMetric = "load_avg"
|
||||
case "flops_any":
|
||||
dbMetric = "flops_any_avg"
|
||||
case "mem_bw":
|
||||
dbMetric = "mem_bw_avg"
|
||||
case "mem_used":
|
||||
dbMetric = "mem_used_max"
|
||||
case "net_bw":
|
||||
dbMetric = "net_bw_avg"
|
||||
case "file_bw":
|
||||
dbMetric = "file_bw_avg"
|
||||
default:
|
||||
return nil, fmt.Errorf("%s not implemented", metric)
|
||||
}
|
||||
|
||||
filters []*model.JobFilter,
|
||||
bins *int,
|
||||
) (*model.MetricHistoPoints, error) {
|
||||
// Get specific Peak or largest Peak
|
||||
var metricConfig *schema.MetricConfig
|
||||
var peak float64 = 0.0
|
||||
var unit string = ""
|
||||
var peak float64
|
||||
var unit string
|
||||
var footprintStat string
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||
peak = metricConfig.Peak
|
||||
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
||||
footprintStat = metricConfig.Footprint
|
||||
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
|
||||
}
|
||||
}
|
||||
@ -572,58 +666,40 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
if unit == "" {
|
||||
unit = m.Unit.Prefix + m.Unit.Base
|
||||
}
|
||||
if footprintStat == "" {
|
||||
footprintStat = m.Footprint
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// log.Debugf("Metric %s: DB %s, Peak %f, Unit %s", metric, dbMetric, peak, unit)
|
||||
// Make bins, see https://jereze.com/code/sql-histogram/
|
||||
|
||||
// log.Debugf("Metric %s, Peak %f, Unit %s", metric, peak, unit)
|
||||
// Make bins, see https://jereze.com/code/sql-histogram/ (Modified here)
|
||||
start := time.Now()
|
||||
|
||||
crossJoinQuery := sq.Select(
|
||||
fmt.Sprintf(`max(%s) as max`, dbMetric),
|
||||
fmt.Sprintf(`min(%s) as min`, dbMetric),
|
||||
).From("job").Where(
|
||||
fmt.Sprintf(`%s is not null`, dbMetric),
|
||||
).Where(
|
||||
fmt.Sprintf(`%s <= %f`, dbMetric, peak),
|
||||
)
|
||||
|
||||
crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery)
|
||||
|
||||
if cjqerr != nil {
|
||||
return nil, cjqerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
crossJoinQuery = BuildWhereClause(f, crossJoinQuery)
|
||||
}
|
||||
|
||||
crossJoinQuerySql, crossJoinQueryArgs, sqlerr := crossJoinQuery.ToSql()
|
||||
if sqlerr != nil {
|
||||
return nil, sqlerr
|
||||
}
|
||||
|
||||
bins := 10
|
||||
binQuery := fmt.Sprintf(`CAST( (case when job.%s = value.max then value.max*0.999999999 else job.%s end - value.min) / (value.max - value.min) * %d as INTEGER )`, dbMetric, dbMetric, bins)
|
||||
// Find Jobs' Value Bin Number: Divide Value by Peak, Multiply by RequestedBins, then CAST to INT: Gets Bin-Number of Job
|
||||
binQuery := fmt.Sprintf(`CAST(
|
||||
((case when json_extract(footprint, "$.%s") = %f then %f*0.999999999 else json_extract(footprint, "$.%s") end) / %f)
|
||||
* %v as INTEGER )`,
|
||||
(metric + "_" + footprintStat), peak, peak, (metric + "_" + footprintStat), peak, *bins)
|
||||
|
||||
mainQuery := sq.Select(
|
||||
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
||||
fmt.Sprintf(`count(job.%s) as count`, dbMetric),
|
||||
fmt.Sprintf(`CAST(((value.max / %d) * (%s )) as INTEGER ) as min`, bins, binQuery),
|
||||
fmt.Sprintf(`CAST(((value.max / %d) * (%s + 1 )) as INTEGER ) as max`, bins, binQuery),
|
||||
).From("job").CrossJoin(
|
||||
fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), crossJoinQueryArgs...,
|
||||
).Where(fmt.Sprintf(`job.%s is not null and job.%s <= %f`, dbMetric, dbMetric, peak))
|
||||
fmt.Sprintf(`count(*) as count`),
|
||||
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
|
||||
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
|
||||
).From("job").Where(
|
||||
"JSON_VALID(footprint)",
|
||||
).Where(fmt.Sprintf(`json_extract(footprint, "$.%s") is not null and json_extract(footprint, "$.%s") <= %f`, (metric + "_" + footprintStat), (metric + "_" + footprintStat), peak))
|
||||
|
||||
// Only accessible Jobs...
|
||||
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
// Filters...
|
||||
for _, f := range filters {
|
||||
mainQuery = BuildWhereClause(f, mainQuery)
|
||||
}
|
||||
@ -637,18 +713,41 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Setup Return Array With Bin-Numbers for Match and Min/Max based on Peak
|
||||
points := make([]*model.MetricHistoPoint, 0)
|
||||
for rows.Next() {
|
||||
point := model.MetricHistoPoint{}
|
||||
if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil {
|
||||
log.Warnf("Error while scanning rows for %s", metric)
|
||||
return nil, err // Totally bricks cc-backend if returned and if all metrics requested?
|
||||
}
|
||||
|
||||
points = append(points, &point)
|
||||
binStep := int(peak) / *bins
|
||||
for i := 1; i <= *bins; i++ {
|
||||
binMin := (binStep * (i - 1))
|
||||
binMax := (binStep * i)
|
||||
epoint := model.MetricHistoPoint{Bin: &i, Count: 0, Min: &binMin, Max: &binMax}
|
||||
points = append(points, &epoint)
|
||||
}
|
||||
|
||||
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Data: points}
|
||||
for rows.Next() { // Fill Count if Bin-No. Matches (Not every Bin exists in DB!)
|
||||
rpoint := model.MetricHistoPoint{}
|
||||
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil { // Required for Debug: &rpoint.Min, &rpoint.Max
|
||||
log.Warnf("Error while scanning rows for %s", metric)
|
||||
return nil, err // FIXME: Totally bricks cc-backend if returned and if all metrics requested?
|
||||
}
|
||||
|
||||
for _, e := range points {
|
||||
if e.Bin != nil && rpoint.Bin != nil {
|
||||
if *e.Bin == *rpoint.Bin {
|
||||
e.Count = rpoint.Count
|
||||
// Only Required For Debug: Check DB returned Min/Max against Backend Init above
|
||||
// if rpoint.Min != nil {
|
||||
// log.Warnf(">>>> Bin %d Min Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Min, *e.Min)
|
||||
// }
|
||||
// if rpoint.Max != nil {
|
||||
// log.Warnf(">>>> Bin %d Max Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Max, *e.Max)
|
||||
// }
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Stat: &footprintStat, Data: points}
|
||||
|
||||
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||
return &result, nil
|
||||
@ -657,7 +756,9 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metrics []string,
|
||||
filters []*model.JobFilter) []*model.MetricHistoPoints {
|
||||
filters []*model.JobFilter,
|
||||
bins *int,
|
||||
) []*model.MetricHistoPoints {
|
||||
|
||||
// Get Jobs
|
||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||
@ -681,7 +782,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
continue
|
||||
}
|
||||
|
||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Errorf("Error while loading averages for histogram: %s", err)
|
||||
return nil
|
||||
}
|
||||
@ -692,15 +793,14 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
for idx, metric := range metrics {
|
||||
// Get specific Peak or largest Peak
|
||||
var metricConfig *schema.MetricConfig
|
||||
var peak float64 = 0.0
|
||||
var unit string = ""
|
||||
var peak float64
|
||||
var unit string
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||
peak = metricConfig.Peak
|
||||
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
||||
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
|
||||
}
|
||||
}
|
||||
|
||||
@ -720,28 +820,24 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
}
|
||||
|
||||
// Make and fill bins
|
||||
bins := 10.0
|
||||
peakBin := peak / bins
|
||||
peakBin := int(peak) / *bins
|
||||
|
||||
points := make([]*model.MetricHistoPoint, 0)
|
||||
for b := 0; b < 10; b++ {
|
||||
for b := 0; b < *bins; b++ {
|
||||
count := 0
|
||||
bindex := b + 1
|
||||
bmin := math.Round(peakBin * float64(b))
|
||||
bmax := math.Round(peakBin * (float64(b) + 1.0))
|
||||
bmin := peakBin * b
|
||||
bmax := peakBin * (b + 1)
|
||||
|
||||
// Iterate AVG values for indexed metric and count for bins
|
||||
for _, val := range avgs[idx] {
|
||||
if float64(val) >= bmin && float64(val) < bmax {
|
||||
if int(val) >= bmin && int(val) < bmax {
|
||||
count += 1
|
||||
}
|
||||
}
|
||||
|
||||
bminint := int(bmin)
|
||||
bmaxint := int(bmax)
|
||||
|
||||
// Append Bin to Metric Result Array
|
||||
point := model.MetricHistoPoint{Bin: &bindex, Count: count, Min: &bminint, Max: &bmaxint}
|
||||
point := model.MetricHistoPoint{Bin: &bindex, Count: count, Min: &bmin, Max: &bmax}
|
||||
points = append(points, &point)
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@ -14,7 +15,13 @@ import (
|
||||
)
|
||||
|
||||
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
||||
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
@ -23,49 +30,153 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
tags, err := r.GetTags(user, &job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// Removes a tag from a job
|
||||
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
|
||||
// Removes a tag from a job by tag id
|
||||
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error adding tag with %s: %v", s, err)
|
||||
log.Errorf("Error removing tag with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
tags, err := r.GetTags(user, &job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// Removes a tag from a job by tag info
|
||||
func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
|
||||
// Get Tag ID to delete
|
||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
return nil, fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
}
|
||||
|
||||
// Get Job
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Handle Delete
|
||||
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tagID)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error removing tag from table 'jobTag' with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(user, &job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// Removes a tag from db by tag info
|
||||
func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
|
||||
// Get Tag ID to delete
|
||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
return fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
}
|
||||
|
||||
// Handle Delete JobTagTable
|
||||
qJobTag := sq.Delete("jobtag").Where("jobtag.tag_id = ?", tagID)
|
||||
|
||||
if _, err := qJobTag.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := qJobTag.ToSql()
|
||||
log.Errorf("Error removing tag from table 'jobTag' with %s: %v", s, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle Delete TagTable
|
||||
qTag := sq.Delete("tag").Where("tag.id = ?", tagID)
|
||||
|
||||
if _, err := qTag.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := qTag.ToSql()
|
||||
log.Errorf("Error removing tag from table 'tag' with %s: %v", s, err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Removes a tag from db by tag id
|
||||
func (r *JobRepository) RemoveTagById(tagID int64) error {
|
||||
// Handle Delete JobTagTable
|
||||
qJobTag := sq.Delete("jobtag").Where("jobtag.tag_id = ?", tagID)
|
||||
|
||||
if _, err := qJobTag.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := qJobTag.ToSql()
|
||||
log.Errorf("Error removing tag from table 'jobTag' with %s: %v", s, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle Delete TagTable
|
||||
qTag := sq.Delete("tag").Where("tag.id = ?", tagID)
|
||||
|
||||
if _, err := qTag.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := qTag.ToSql()
|
||||
log.Errorf("Error removing tag from table 'tag' with %s: %v", s, err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateTag creates a new tag with the specified type and name and returns its database id.
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64, err error) {
|
||||
q := sq.Insert("tag").Columns("tag_type", "tag_name").Values(tagType, tagName)
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
}
|
||||
|
||||
q := sq.Insert("tag").Columns("tag_type", "tag_name", "tag_scope").Values(tagType, tagName, tagScope)
|
||||
|
||||
res, err := q.RunWith(r.stmtCache).Exec()
|
||||
if err != nil {
|
||||
@ -78,8 +189,9 @@ func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64,
|
||||
}
|
||||
|
||||
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
|
||||
// Fetch all Tags in DB for Display in Frontend Tag-View
|
||||
tags = make([]schema.Tag, 0, 100)
|
||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name FROM tag")
|
||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@ -89,22 +201,42 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
if err = xrows.StructScan(&t); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
tags = append(tags, t)
|
||||
|
||||
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||
readable, err := r.checkScopeAuth(user, "read", t.Scope)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if readable {
|
||||
tags = append(tags, t)
|
||||
}
|
||||
}
|
||||
|
||||
q := sq.Select("t.tag_name, count(jt.tag_id)").
|
||||
// Query and Count Jobs with attached Tags
|
||||
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
|
||||
From("tag t").
|
||||
LeftJoin("jobtag jt ON t.id = jt.tag_id").
|
||||
GroupBy("t.tag_name")
|
||||
|
||||
// Handle Scope Filtering
|
||||
scopeList := "\"global\""
|
||||
if user != nil {
|
||||
scopeList += ",\"" + user.Username + "\""
|
||||
}
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
scopeList += ",\"admin\""
|
||||
}
|
||||
q = q.Where("t.tag_scope IN (" + scopeList + ")")
|
||||
|
||||
// Handle Job Ownership
|
||||
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
|
||||
log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
||||
// log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
||||
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
|
||||
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
|
||||
// Build ("project1", "project2", ...) list of variable length directly in SQL string
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
|
||||
} else if user != nil { // USER OR NO ROLE (Compatibility): Only count own jobs
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ?)", user.Username)
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ?)", user.Username)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
@ -115,29 +247,44 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
counts = make(map[string]int)
|
||||
for rows.Next() {
|
||||
var tagName string
|
||||
var tagId int
|
||||
var count int
|
||||
if err = rows.Scan(&tagName, &count); err != nil {
|
||||
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
counts[tagName] = count
|
||||
// Use tagId as second Map-Key component to differentiate tags with identical names
|
||||
counts[fmt.Sprint(tagName, tagId)] = count
|
||||
}
|
||||
err = rows.Err()
|
||||
|
||||
return
|
||||
return tags, counts, err
|
||||
}
|
||||
|
||||
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
|
||||
// If such a tag does not yet exist, it is created.
|
||||
func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
||||
tagId, exists := r.TagId(tagType, tagName)
|
||||
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
}
|
||||
|
||||
writable, err := r.checkScopeAuth(user, "write", tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if !writable {
|
||||
return 0, fmt.Errorf("cannot write tag scope with current authorization")
|
||||
}
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName)
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := r.AddTag(jobId, tagId); err != nil {
|
||||
if _, err := r.AddTag(user, jobId, tagId); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@ -158,19 +305,29 @@ func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool
|
||||
}
|
||||
|
||||
// TagId returns the database id of the tag with the specified type and name.
|
||||
func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) {
|
||||
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
||||
exists = true
|
||||
if err := sq.Select("id").From("tag").
|
||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).
|
||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
|
||||
exists = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// GetTags returns a list of all tags if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name").From("tag")
|
||||
// TagInfo returns the database infos of the tag with the specified id.
|
||||
func (r *JobRepository) TagInfo(tagId int64) (tagType string, tagName string, tagScope string, exists bool) {
|
||||
exists = true
|
||||
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagId).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&tagType, &tagName, &tagScope); err != nil {
|
||||
exists = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// GetTags returns a list of all scoped tags if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||
if job != nil {
|
||||
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||
}
|
||||
@ -185,7 +342,41 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
tags := make([]*schema.Tag, 0)
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||
readable, err := r.checkScopeAuth(user, "read", tag.Scope)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if readable {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||
if job != nil {
|
||||
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error get tags with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := make([]*schema.Tag, 0)
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
@ -194,3 +385,59 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
|
||||
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error adding tag on import with %s: %v", s, err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
|
||||
if user != nil {
|
||||
switch {
|
||||
case operation == "write" && scope == "admin":
|
||||
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case operation == "write" && scope == "global":
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case operation == "write" && scope == user.Username:
|
||||
return true, nil
|
||||
case operation == "read" && scope == "admin":
|
||||
return user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}), nil
|
||||
case operation == "read" && scope == "global":
|
||||
return true, nil
|
||||
case operation == "read" && scope == user.Username:
|
||||
return true, nil
|
||||
default:
|
||||
if operation == "read" || operation == "write" {
|
||||
// No acceptable scope: deny tag
|
||||
return false, nil
|
||||
} else {
|
||||
return false, fmt.Errorf("error while checking tag operation auth: unknown operation (%s)", operation)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false, fmt.Errorf("error while checking tag operation auth: no user in context")
|
||||
}
|
||||
}
|
||||
|
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
BIN
internal/repository/testdata/job.db-shm
vendored
BIN
internal/repository/testdata/job.db-shm
vendored
Binary file not shown.
0
internal/repository/testdata/job.db-wal
vendored
0
internal/repository/testdata/job.db-wal
vendored
@ -6,7 +6,6 @@ package repository
|
||||
|
||||
import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
@ -18,20 +17,12 @@ type Transaction struct {
|
||||
func (r *JobRepository) TransactionInit() (*Transaction, error) {
|
||||
var err error
|
||||
t := new(Transaction)
|
||||
// Inserts are bundled into transactions because in sqlite,
|
||||
// that speeds up inserts A LOT.
|
||||
|
||||
t.tx, err = r.DB.Beginx()
|
||||
if err != nil {
|
||||
log.Warn("Error while bundling transactions")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t.stmt, err = t.tx.PrepareNamed(NamedJobInsert)
|
||||
if err != nil {
|
||||
log.Warn("Error while preparing namedJobInsert")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
@ -50,7 +41,6 @@ func (r *JobRepository) TransactionCommit(t *Transaction) error {
|
||||
return err
|
||||
}
|
||||
|
||||
t.stmt = t.tx.NamedStmt(t.stmt)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -59,14 +49,17 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
|
||||
log.Warn("Error while committing SQL transactions")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, error) {
|
||||
res, err := t.stmt.Exec(job)
|
||||
func (r *JobRepository) TransactionAddNamed(
|
||||
t *Transaction,
|
||||
query string,
|
||||
args ...interface{},
|
||||
) (int64, error) {
|
||||
res, err := t.tx.NamedExec(query, args)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
log.Errorf("Named Exec failed: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@ -79,26 +72,19 @@ func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, e
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionAddTag(t *Transaction, tag *schema.Tag) (int64, error) {
|
||||
res, err := t.tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
|
||||
|
||||
res, err := t.tx.Exec(query, args...)
|
||||
if err != nil {
|
||||
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
|
||||
return 0, err
|
||||
}
|
||||
tagId, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
log.Errorf("TransactionAdd(), Exec() Error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return tagId, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionSetTag(t *Transaction, jobId int64, tagId int64) error {
|
||||
if _, err := t.tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, jobId, tagId); err != nil {
|
||||
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", jobId, tagId)
|
||||
return err
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return nil
|
||||
return id, nil
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ import (
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -46,8 +47,8 @@ func GetUserRepository() *UserRepository {
|
||||
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
||||
user := &schema.User{Username: username}
|
||||
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
|
||||
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
|
||||
Where("user.username = ?", username).RunWith(r.DB).
|
||||
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("hpc_user").
|
||||
Where("hpc_user.username = ?", username).RunWith(r.DB).
|
||||
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
|
||||
log.Warnf("Error while querying user '%v' from database", username)
|
||||
return nil, err
|
||||
@ -72,9 +73,8 @@ func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
||||
}
|
||||
|
||||
func (r *UserRepository) GetLdapUsernames() ([]string, error) {
|
||||
|
||||
var users []string
|
||||
rows, err := r.DB.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
||||
rows, err := r.DB.Query(`SELECT username FROM hpc_user WHERE hpc_user.ldap = 1`)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying usernames")
|
||||
return nil, err
|
||||
@ -122,18 +122,62 @@ func (r *UserRepository) AddUser(user *schema.User) error {
|
||||
vals = append(vals, int(user.AuthSource))
|
||||
}
|
||||
|
||||
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
|
||||
if _, err := sq.Insert("hpc_user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Infof("new user %#v created (roles: %s, auth-source: %d, projects: %s)", user.Username, rolesJson, user.AuthSource, projectsJson)
|
||||
|
||||
defaultMetricsCfg, err := config.LoadDefaultMetricsConfig()
|
||||
if err != nil {
|
||||
log.Errorf("Error loading default metrics config: %v", err)
|
||||
} else if defaultMetricsCfg != nil {
|
||||
for _, cluster := range defaultMetricsCfg.Clusters {
|
||||
metricsArray := config.ParseMetricsString(cluster.DefaultMetrics)
|
||||
metricsJSON, err := json.Marshal(metricsArray)
|
||||
if err != nil {
|
||||
log.Errorf("Error marshaling default metrics for cluster %s: %v", cluster.Name, err)
|
||||
continue
|
||||
}
|
||||
confKey := "job_view_selectedMetrics:" + cluster.Name
|
||||
if _, err := sq.Insert("configuration").
|
||||
Columns("username", "confkey", "value").
|
||||
Values(user.Username, confKey, string(metricsJSON)).
|
||||
RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error inserting default job view metrics for user %s and cluster %s: %v", user.Username, cluster.Name, err)
|
||||
} else {
|
||||
log.Infof("Default job view metrics for user %s and cluster %s set to %s", user.Username, cluster.Name, string(metricsJSON))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
|
||||
// user contains updated info, apply to dbuser
|
||||
// TODO: Discuss updatable fields
|
||||
if dbUser.Name != user.Name {
|
||||
if _, err := sq.Update("hpc_user").Set("name", user.Name).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("error while updating name of user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Toggled until greenlit
|
||||
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
|
||||
// projects, _ := json.Marshal(user.Projects)
|
||||
// if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *UserRepository) DelUser(username string) error {
|
||||
|
||||
_, err := r.DB.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
||||
_, err := r.DB.Exec(`DELETE FROM hpc_user WHERE hpc_user.username = ?`, username)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting user '%s' from DB", username)
|
||||
return err
|
||||
@ -143,8 +187,7 @@ func (r *UserRepository) DelUser(username string) error {
|
||||
}
|
||||
|
||||
func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||
|
||||
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
|
||||
q := sq.Select("username", "name", "email", "roles", "projects").From("hpc_user")
|
||||
if specialsOnly {
|
||||
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
|
||||
}
|
||||
@ -186,8 +229,8 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||
func (r *UserRepository) AddRole(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
queryrole string) error {
|
||||
|
||||
queryrole string,
|
||||
) error {
|
||||
newRole := strings.ToLower(queryrole)
|
||||
user, err := r.GetUser(username)
|
||||
if err != nil {
|
||||
@ -198,15 +241,15 @@ func (r *UserRepository) AddRole(
|
||||
exists, valid := user.HasValidRole(newRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
|
||||
return fmt.Errorf("supplied role is no valid option : %v", newRole)
|
||||
}
|
||||
if exists {
|
||||
return fmt.Errorf("User %v already has role %v", username, newRole)
|
||||
return fmt.Errorf("user %v already has role %v", username, newRole)
|
||||
}
|
||||
|
||||
roles, _ := json.Marshal(append(user.Roles, newRole))
|
||||
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while adding new role for user '%s'", user.Username)
|
||||
if _, err := sq.Update("hpc_user").Set("roles", roles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("error while adding new role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@ -223,14 +266,14 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
exists, valid := user.HasValidRole(oldRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
|
||||
return fmt.Errorf("supplied role is no valid option : %v", oldRole)
|
||||
}
|
||||
if !exists {
|
||||
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
|
||||
return fmt.Errorf("role already deleted for user '%v': %v", username, oldRole)
|
||||
}
|
||||
|
||||
if oldRole == schema.GetRoleString(schema.RoleManager) && len(user.Projects) != 0 {
|
||||
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||
return fmt.Errorf("cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||
}
|
||||
|
||||
var newroles []string
|
||||
@ -240,8 +283,8 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
}
|
||||
}
|
||||
|
||||
var mroles, _ = json.Marshal(newroles)
|
||||
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
mroles, _ := json.Marshal(newroles)
|
||||
if _, err := sq.Update("hpc_user").Set("roles", mroles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while removing role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
@ -251,15 +294,15 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
func (r *UserRepository) AddProject(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
project string) error {
|
||||
|
||||
project string,
|
||||
) error {
|
||||
user, err := r.GetUser(username)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleManager) {
|
||||
return fmt.Errorf("user '%s' is not a manager!", username)
|
||||
return fmt.Errorf("user '%s' is not a manager", username)
|
||||
}
|
||||
|
||||
if user.HasProject(project) {
|
||||
@ -267,7 +310,7 @@ func (r *UserRepository) AddProject(
|
||||
}
|
||||
|
||||
projects, _ := json.Marshal(append(user.Projects, project))
|
||||
if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -281,11 +324,11 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleManager) {
|
||||
return fmt.Errorf("user '%#v' is not a manager!", username)
|
||||
return fmt.Errorf("user '%#v' is not a manager", username)
|
||||
}
|
||||
|
||||
if !user.HasProject(project) {
|
||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
|
||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match", username, project)
|
||||
}
|
||||
|
||||
var exists bool
|
||||
@ -298,14 +341,14 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
||||
}
|
||||
}
|
||||
|
||||
if exists == true {
|
||||
if exists {
|
||||
var result interface{}
|
||||
if len(newprojects) == 0 {
|
||||
result = "[]"
|
||||
} else {
|
||||
result, _ = json.Marshal(newprojects)
|
||||
}
|
||||
if _, err := sq.Update("user").Set("projects", result).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
if _, err := sq.Update("hpc_user").Set("projects", result).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@ -321,9 +364,10 @@ const ContextUserKey ContextKey = "user"
|
||||
func GetUserFromContext(ctx context.Context) *schema.User {
|
||||
x := ctx.Value(ContextUserKey)
|
||||
if x == nil {
|
||||
log.Warnf("no user retrieved from context")
|
||||
return nil
|
||||
}
|
||||
|
||||
// log.Infof("user retrieved from context: %v", x.(*schema.User))
|
||||
return x.(*schema.User)
|
||||
}
|
||||
|
||||
@ -336,7 +380,7 @@ func (r *UserRepository) FetchUserInCtx(ctx context.Context, username string) (*
|
||||
|
||||
user := &model.User{Username: username}
|
||||
var name, email sql.NullString
|
||||
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
|
||||
if err := sq.Select("name", "email").From("hpc_user").Where("hpc_user.username = ?", username).
|
||||
RunWith(r.DB).QueryRow().Scan(&name, &email); err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
|
||||
|
@ -24,9 +24,9 @@ var (
|
||||
type UserCfgRepo struct {
|
||||
DB *sqlx.DB
|
||||
Lookup *sqlx.Stmt
|
||||
lock sync.RWMutex
|
||||
uiDefaults map[string]interface{}
|
||||
cache *lrucache.Cache
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
func GetUserCfgRepo() *UserCfgRepo {
|
||||
@ -35,7 +35,7 @@ func GetUserCfgRepo() *UserCfgRepo {
|
||||
|
||||
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
|
||||
if err != nil {
|
||||
log.Fatalf("db.DB.Preparex() error: %v", err)
|
||||
log.Fatalf("User Config: Call 'db.DB.Preparex()' failed.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
userCfgRepoInstance = &UserCfgRepo{
|
||||
@ -112,8 +112,8 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *schema.User) (map[string]interface{},
|
||||
// configuration.
|
||||
func (uCfg *UserCfgRepo) UpdateConfig(
|
||||
key, value string,
|
||||
user *schema.User) error {
|
||||
|
||||
user *schema.User,
|
||||
) error {
|
||||
if user == nil {
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||
|
@ -25,6 +25,9 @@ func setupUserTest(t *testing.T) *UserCfgRepo {
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
@ -34,32 +35,38 @@ type Route struct {
|
||||
|
||||
var routes []Route = []Route{
|
||||
{"/", "home.tmpl", "ClusterCockpit", false, setupHomeRoute},
|
||||
{"/config", "config.tmpl", "Settings", false, func(i InfoType, r *http.Request) InfoType { return i }},
|
||||
{"/config", "config.tmpl", "Settings", false, setupConfigRoute},
|
||||
{"/monitoring/jobs/", "monitoring/jobs.tmpl", "Jobs - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { return i }},
|
||||
{"/monitoring/job/{id:[0-9]+}", "monitoring/job.tmpl", "Job <ID> - ClusterCockpit", false, setupJobRoute},
|
||||
{"/monitoring/users/", "monitoring/list.tmpl", "Users - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { i["listType"] = "USER"; return i }},
|
||||
{"/monitoring/projects/", "monitoring/list.tmpl", "Projects - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { i["listType"] = "PROJECT"; return i }},
|
||||
{"/monitoring/tags/", "monitoring/taglist.tmpl", "Tags - ClusterCockpit", false, setupTaglistRoute},
|
||||
{"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute},
|
||||
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> - ClusterCockpit", false, setupClusterRoute},
|
||||
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> Node Overview - ClusterCockpit", false, setupClusterOverviewRoute},
|
||||
{"/monitoring/systems/list/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> Node List - ClusterCockpit", false, setupClusterListRoute},
|
||||
{"/monitoring/systems/list/{cluster}/{subcluster}", "monitoring/systems.tmpl", "Cluster <ID> <SID> Node List - ClusterCockpit", false, setupClusterListRoute},
|
||||
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
|
||||
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
|
||||
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterRoute},
|
||||
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterStatusRoute},
|
||||
}
|
||||
|
||||
func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
||||
jobRepo := repository.GetJobRepository()
|
||||
groupBy := model.AggregateCluster
|
||||
|
||||
// startJobCount := time.Now()
|
||||
stats, err := jobRepo.JobCountGrouped(r.Context(), nil, &groupBy)
|
||||
if err != nil {
|
||||
log.Warnf("failed to count jobs: %s", err.Error())
|
||||
}
|
||||
// log.Infof("Timer HOME ROUTE startJobCount: %s", time.Since(startJobCount))
|
||||
|
||||
// startRunningJobCount := time.Now()
|
||||
stats, err = jobRepo.AddJobCountGrouped(r.Context(), nil, &groupBy, stats, "running")
|
||||
if err != nil {
|
||||
log.Warnf("failed to count running jobs: %s", err.Error())
|
||||
}
|
||||
// log.Infof("Timer HOME ROUTE startRunningJobCount: %s", time.Since(startRunningJobCount))
|
||||
|
||||
i["clusters"] = stats
|
||||
|
||||
@ -75,8 +82,22 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
||||
return i
|
||||
}
|
||||
|
||||
func setupConfigRoute(i InfoType, r *http.Request) InfoType {
|
||||
if util.CheckFileExists("./var/notice.txt") {
|
||||
msg, err := os.ReadFile("./var/notice.txt")
|
||||
if err == nil {
|
||||
i["ncontent"] = string(msg)
|
||||
}
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
func setupJobRoute(i InfoType, r *http.Request) InfoType {
|
||||
i["id"] = mux.Vars(r)["id"]
|
||||
if config.Keys.EmissionConstant != 0 {
|
||||
i["emission"] = config.Keys.EmissionConstant
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@ -92,7 +113,7 @@ func setupUserRoute(i InfoType, r *http.Request) InfoType {
|
||||
return i
|
||||
}
|
||||
|
||||
func setupClusterRoute(i InfoType, r *http.Request) InfoType {
|
||||
func setupClusterStatusRoute(i InfoType, r *http.Request) InfoType {
|
||||
vars := mux.Vars(r)
|
||||
i["id"] = vars["cluster"]
|
||||
i["cluster"] = vars["cluster"]
|
||||
@ -104,6 +125,36 @@ func setupClusterRoute(i InfoType, r *http.Request) InfoType {
|
||||
return i
|
||||
}
|
||||
|
||||
func setupClusterOverviewRoute(i InfoType, r *http.Request) InfoType {
|
||||
vars := mux.Vars(r)
|
||||
i["id"] = vars["cluster"]
|
||||
i["cluster"] = vars["cluster"]
|
||||
i["displayType"] = "OVERVIEW"
|
||||
|
||||
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
|
||||
if from != "" || to != "" {
|
||||
i["from"] = from
|
||||
i["to"] = to
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func setupClusterListRoute(i InfoType, r *http.Request) InfoType {
|
||||
vars := mux.Vars(r)
|
||||
i["id"] = vars["cluster"]
|
||||
i["cluster"] = vars["cluster"]
|
||||
i["sid"] = vars["subcluster"]
|
||||
i["subCluster"] = vars["subcluster"]
|
||||
i["displayType"] = "LIST"
|
||||
|
||||
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
|
||||
if from != "" || to != "" {
|
||||
i["from"] = from
|
||||
i["to"] = to
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func setupNodeRoute(i InfoType, r *http.Request) InfoType {
|
||||
vars := mux.Vars(r)
|
||||
i["cluster"] = vars["cluster"]
|
||||
@ -124,28 +175,46 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
|
||||
|
||||
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
|
||||
jobRepo := repository.GetJobRepository()
|
||||
user := repository.GetUserFromContext(r.Context())
|
||||
|
||||
tags, counts, err := jobRepo.CountTags(user)
|
||||
tags, counts, err := jobRepo.CountTags(repository.GetUserFromContext(r.Context()))
|
||||
tagMap := make(map[string][]map[string]interface{})
|
||||
if err != nil {
|
||||
log.Warnf("GetTags failed: %s", err.Error())
|
||||
i["tagmap"] = tagMap
|
||||
return i
|
||||
}
|
||||
|
||||
for _, tag := range tags {
|
||||
tagItem := map[string]interface{}{
|
||||
"id": tag.ID,
|
||||
"name": tag.Name,
|
||||
"count": counts[tag.Name],
|
||||
// Reduces displayed tags for unauth'd users
|
||||
userAuthlevel := repository.GetUserFromContext(r.Context()).GetAuthLevel()
|
||||
// Uses tag.ID as second Map-Key component to differentiate tags with identical names
|
||||
if userAuthlevel >= 4 { // Support+ : Show tags for all scopes, regardless of count
|
||||
for _, tag := range tags {
|
||||
tagItem := map[string]interface{}{
|
||||
"id": tag.ID,
|
||||
"name": tag.Name,
|
||||
"scope": tag.Scope,
|
||||
"count": counts[fmt.Sprint(tag.Name, tag.ID)],
|
||||
}
|
||||
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||
}
|
||||
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||
}
|
||||
} else if userAuthlevel < 4 && userAuthlevel >= 2 { // User+ : Show global and admin scope only if at least 1 tag used, private scope regardless of count
|
||||
for _, tag := range tags {
|
||||
tagCount := counts[fmt.Sprint(tag.Name, tag.ID)]
|
||||
if ((tag.Scope == "global" || tag.Scope == "admin") && tagCount >= 1) || (tag.Scope != "global" && tag.Scope != "admin") {
|
||||
tagItem := map[string]interface{}{
|
||||
"id": tag.ID,
|
||||
"name": tag.Name,
|
||||
"scope": tag.Scope,
|
||||
"count": tagCount,
|
||||
}
|
||||
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||
}
|
||||
}
|
||||
} // auth < 2 return nothing for this route
|
||||
|
||||
i["tagmap"] = tagMap
|
||||
return i
|
||||
}
|
||||
|
||||
// FIXME: Lots of redundant code. Needs refactoring
|
||||
func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
filterPresets := map[string]interface{}{}
|
||||
|
||||
@ -208,6 +277,16 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
}
|
||||
}
|
||||
}
|
||||
if query.Get("numHWThreads") != "" {
|
||||
parts := strings.Split(query.Get("numHWThreads"), "-")
|
||||
if len(parts) == 2 {
|
||||
a, e1 := strconv.Atoi(parts[0])
|
||||
b, e2 := strconv.Atoi(parts[1])
|
||||
if e1 == nil && e2 == nil {
|
||||
filterPresets["numHWThreads"] = map[string]int{"from": a, "to": b}
|
||||
}
|
||||
}
|
||||
}
|
||||
if query.Get("numAccelerators") != "" {
|
||||
parts := strings.Split(query.Get("numAccelerators"), "-")
|
||||
if len(parts) == 2 {
|
||||
@ -218,6 +297,9 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(query["dbId"]) != 0 {
|
||||
filterPresets["dbId"] = query["dbId"]
|
||||
}
|
||||
if query.Get("jobId") != "" {
|
||||
if len(query["jobId"]) == 1 {
|
||||
filterPresets["jobId"] = query.Get("jobId")
|
||||
@ -234,7 +316,7 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
}
|
||||
if query.Get("startTime") != "" {
|
||||
parts := strings.Split(query.Get("startTime"), "-")
|
||||
if len(parts) == 2 {
|
||||
if len(parts) == 2 { // Time in seconds, from - to
|
||||
a, e1 := strconv.ParseInt(parts[0], 10, 64)
|
||||
b, e2 := strconv.ParseInt(parts[1], 10, 64)
|
||||
if e1 == nil && e2 == nil {
|
||||
@ -243,9 +325,41 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
"to": time.Unix(b, 0).Format(time.RFC3339),
|
||||
}
|
||||
}
|
||||
} else { // named range
|
||||
filterPresets["startTime"] = map[string]string{
|
||||
"range": query.Get("startTime"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if query.Get("energy") != "" {
|
||||
parts := strings.Split(query.Get("energy"), "-")
|
||||
if len(parts) == 2 {
|
||||
a, e1 := strconv.Atoi(parts[0])
|
||||
b, e2 := strconv.Atoi(parts[1])
|
||||
if e1 == nil && e2 == nil {
|
||||
filterPresets["energy"] = map[string]int{"from": a, "to": b}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(query["stat"]) != 0 {
|
||||
statList := make([]map[string]interface{}, 0)
|
||||
for _, statEntry := range query["stat"] {
|
||||
parts := strings.Split(statEntry, "-")
|
||||
if len(parts) == 3 { // Metric Footprint Stat Field, from - to
|
||||
a, e1 := strconv.ParseInt(parts[1], 10, 64)
|
||||
b, e2 := strconv.ParseInt(parts[2], 10, 64)
|
||||
if e1 == nil && e2 == nil {
|
||||
statEntry := map[string]interface{}{
|
||||
"field": parts[0],
|
||||
"from": a,
|
||||
"to": b,
|
||||
}
|
||||
statList = append(statList, statEntry)
|
||||
}
|
||||
}
|
||||
}
|
||||
filterPresets["stats"] = statList
|
||||
}
|
||||
return filterPresets
|
||||
}
|
||||
|
||||
@ -264,20 +378,25 @@ func SetupRoutes(router *mux.Router, buildInfo web.Build) {
|
||||
infos := route.Setup(map[string]interface{}{}, r)
|
||||
if id, ok := infos["id"]; ok {
|
||||
title = strings.Replace(route.Title, "<ID>", id.(string), 1)
|
||||
if sid, ok := infos["sid"]; ok { // 2nd ID element
|
||||
title = strings.Replace(title, "<SID>", sid.(string), 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Get User -> What if NIL?
|
||||
user := repository.GetUserFromContext(r.Context())
|
||||
|
||||
// Get Roles
|
||||
availableRoles, _ := schema.GetValidRolesMap(user)
|
||||
|
||||
page := web.Page{
|
||||
Title: title,
|
||||
User: *user,
|
||||
Roles: availableRoles,
|
||||
Build: buildInfo,
|
||||
Config: conf,
|
||||
Infos: infos,
|
||||
Title: title,
|
||||
User: *user,
|
||||
Roles: availableRoles,
|
||||
Build: buildInfo,
|
||||
Config: conf,
|
||||
Resampling: config.Keys.EnableResampling,
|
||||
Infos: infos,
|
||||
}
|
||||
|
||||
if route.Filter {
|
||||
@ -302,11 +421,19 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
|
||||
case "jobId":
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
|
||||
case "jobName":
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
|
||||
// Add Last 30 Days to migitate timeouts
|
||||
untilTime := strconv.FormatInt(time.Now().Unix(), 10)
|
||||
fromTime := strconv.FormatInt((time.Now().Unix() - int64(30*24*3600)), 10)
|
||||
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
|
||||
case "projectId":
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
|
||||
case "arrayJobId":
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?arrayJobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
|
||||
// Add Last 30 Days to migitate timeouts
|
||||
untilTime := strconv.FormatInt(time.Now().Unix(), 10)
|
||||
fromTime := strconv.FormatInt((time.Now().Unix() - int64(30*24*3600)), 10)
|
||||
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&arrayJobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
|
||||
case "username":
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
|
||||
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound)
|
||||
@ -339,7 +466,11 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
|
||||
} else if project != "" {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(project), http.StatusFound) // projectId (equal)
|
||||
} else if jobname != "" {
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(jobname), http.StatusFound) // JobName (contains)
|
||||
// Add Last 30 Days to migitate timeouts
|
||||
untilTime := strconv.FormatInt(time.Now().Unix(), 10)
|
||||
fromTime := strconv.FormatInt((time.Now().Unix() - int64(30*24*3600)), 10)
|
||||
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&jobName="+url.QueryEscape(jobname), http.StatusFound) // 30D Fitler + JobName (contains)
|
||||
} else {
|
||||
web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Info", MsgType: "alert-info", Message: "Search without result", User: *user, Roles: availableRoles, Build: buildInfo})
|
||||
}
|
||||
|
41
internal/taskManager/compressionService.go
Normal file
41
internal/taskManager/compressionService.go
Normal file
@ -0,0 +1,41 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterCompressionService(compressOlderThan int) {
|
||||
log.Info("Register compression service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(05, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
var jobs []*schema.Job
|
||||
var err error
|
||||
|
||||
ar := archive.GetHandle()
|
||||
startTime := time.Now().Unix() - int64(compressOlderThan*24*3600)
|
||||
lastTime := ar.CompressLast(startTime)
|
||||
if startTime == lastTime {
|
||||
log.Info("Compression Service - Complete archive run")
|
||||
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
||||
|
||||
} else {
|
||||
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for compression jobs: %v", err)
|
||||
}
|
||||
ar.Compress(jobs)
|
||||
}))
|
||||
}
|
36
internal/taskManager/ldapSyncService.go
Normal file
36
internal/taskManager/ldapSyncService.go
Normal file
@ -0,0 +1,36 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterLdapSyncService(ds string) {
|
||||
interval, err := parseDuration(ds)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v",
|
||||
ds)
|
||||
return
|
||||
}
|
||||
|
||||
auth := auth.GetAuthInstance()
|
||||
|
||||
log.Info("Register LDAP sync service")
|
||||
s.NewJob(gocron.DurationJob(interval),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
t := time.Now()
|
||||
log.Printf("ldap sync started at %s", t.Format(time.RFC3339))
|
||||
if err := auth.LdapAuth.Sync(); err != nil {
|
||||
log.Errorf("ldap sync failed: %s", err.Error())
|
||||
}
|
||||
log.Print("ldap sync done")
|
||||
}))
|
||||
}
|
67
internal/taskManager/retentionService.go
Normal file
67
internal/taskManager/retentionService.go
Normal file
@ -0,0 +1,67 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterRetentionDeleteService(age int, includeDB bool) {
|
||||
log.Info("Register retention delete service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
startTime := time.Now().Unix() - int64(age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().CleanUp(jobs)
|
||||
|
||||
if includeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %s", err.Error())
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
func RegisterRetentionMoveService(age int, includeDB bool, location string) {
|
||||
log.Info("Register retention move service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
startTime := time.Now().Unix() - int64(age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().Move(jobs, location)
|
||||
|
||||
if includeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %v", err)
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
27
internal/taskManager/stopJobsExceedTime.go
Normal file
27
internal/taskManager/stopJobsExceedTime.go
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterStopJobsExceedTime() {
|
||||
log.Info("Register undead jobs service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(03, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||
}
|
||||
runtime.GC()
|
||||
}))
|
||||
}
|
90
internal/taskManager/taskManager.go
Normal file
90
internal/taskManager/taskManager.go
Normal file
@ -0,0 +1,90 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
s gocron.Scheduler
|
||||
jobRepo *repository.JobRepository
|
||||
)
|
||||
|
||||
func parseDuration(s string) (time.Duration, error) {
|
||||
interval, err := time.ParseDuration(s)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v",
|
||||
s)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
log.Info("TaskManager: Sync interval is zero")
|
||||
}
|
||||
|
||||
return interval, nil
|
||||
}
|
||||
|
||||
func Start() {
|
||||
var err error
|
||||
jobRepo = repository.GetJobRepository()
|
||||
s, err = gocron.NewScheduler()
|
||||
if err != nil {
|
||||
log.Abortf("Taskmanager Start: Could not create gocron scheduler.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if config.Keys.StopJobsExceedingWalltime > 0 {
|
||||
RegisterStopJobsExceedTime()
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
Retention schema.Retention `json:"retention"`
|
||||
Compression int `json:"compression"`
|
||||
}
|
||||
cfg.Retention.IncludeDB = true
|
||||
|
||||
if err := json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
}
|
||||
|
||||
switch cfg.Retention.Policy {
|
||||
case "delete":
|
||||
RegisterRetentionDeleteService(
|
||||
cfg.Retention.Age,
|
||||
cfg.Retention.IncludeDB)
|
||||
case "move":
|
||||
RegisterRetentionMoveService(
|
||||
cfg.Retention.Age,
|
||||
cfg.Retention.IncludeDB,
|
||||
cfg.Retention.Location)
|
||||
}
|
||||
|
||||
if cfg.Compression > 0 {
|
||||
RegisterCompressionService(cfg.Compression)
|
||||
}
|
||||
|
||||
lc := config.Keys.LdapConfig
|
||||
|
||||
if lc != nil && lc.SyncInterval != "" {
|
||||
RegisterLdapSyncService(lc.SyncInterval)
|
||||
}
|
||||
|
||||
RegisterFootprintWorker()
|
||||
RegisterUpdateDurationWorker()
|
||||
|
||||
s.Start()
|
||||
}
|
||||
|
||||
func Shutdown() {
|
||||
s.Shutdown()
|
||||
}
|
33
internal/taskManager/updateDurationService.go
Normal file
33
internal/taskManager/updateDurationService.go
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterUpdateDurationWorker() {
|
||||
var frequency string
|
||||
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.DurationWorker != "" {
|
||||
frequency = config.Keys.CronFrequency.DurationWorker
|
||||
} else {
|
||||
frequency = "5m"
|
||||
}
|
||||
d, _ := time.ParseDuration(frequency)
|
||||
log.Infof("Register Duration Update service with %s interval", frequency)
|
||||
|
||||
s.NewJob(gocron.DurationJob(d),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
start := time.Now()
|
||||
log.Printf("Update duration started at %s", start.Format(time.RFC3339))
|
||||
jobRepo.UpdateDuration()
|
||||
log.Printf("Update duration is done and took %s", time.Since(start))
|
||||
}))
|
||||
}
|
146
internal/taskManager/updateFootprintService.go
Normal file
146
internal/taskManager/updateFootprintService.go
Normal file
@ -0,0 +1,146 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterFootprintWorker() {
|
||||
var frequency string
|
||||
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.FootprintWorker != "" {
|
||||
frequency = config.Keys.CronFrequency.FootprintWorker
|
||||
} else {
|
||||
frequency = "10m"
|
||||
}
|
||||
d, _ := time.ParseDuration(frequency)
|
||||
log.Infof("Register Footprint Update service with %s interval", frequency)
|
||||
|
||||
s.NewJob(gocron.DurationJob(d),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
s := time.Now()
|
||||
c := 0
|
||||
ce := 0
|
||||
cl := 0
|
||||
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
|
||||
|
||||
for _, cluster := range archive.Clusters {
|
||||
s_cluster := time.Now()
|
||||
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// NOTE: Additional Subcluster Loop Could Allow For Limited List Of Footprint-Metrics Only.
|
||||
// - Chunk-Size Would Then Be 'SubCluster' (Running Jobs, Transactions) as Lists Can Change Within SCs
|
||||
// - Would Require Review of 'updateFootprint' Usage (Logic Could Possibly Be Included Here Completely)
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(cluster.Name)
|
||||
if err != nil {
|
||||
log.Errorf("no metric data repository configured for '%s'", cluster.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
pendingStatements := []sq.UpdateBuilder{}
|
||||
|
||||
for _, job := range jobs {
|
||||
log.Debugf("Prepare job %d", job.JobID)
|
||||
cl++
|
||||
|
||||
s_job := time.Now()
|
||||
|
||||
jobStats, err := repo.LoadStats(job, allMetrics, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("error wile loading job data stats for footprint update: %v", err)
|
||||
ce++
|
||||
continue
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for _, metric := range allMetrics {
|
||||
avg, min, max := 0.0, 0.0, 0.0
|
||||
data, ok := jobStats[metric] // JobStats[Metric1:[Hostname1:[Stats], Hostname2:[Stats], ...], Metric2[...] ...]
|
||||
if ok {
|
||||
for _, res := range job.Resources {
|
||||
hostStats, ok := data[res.Hostname]
|
||||
if ok {
|
||||
avg += hostStats.Avg
|
||||
min = math.Min(min, hostStats.Min)
|
||||
max = math.Max(max, hostStats.Max)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Add values rounded to 2 digits: repo.LoadStats may return unrounded
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||
Min: (math.Round(min*100) / 100),
|
||||
Max: (math.Round(max*100) / 100),
|
||||
}
|
||||
}
|
||||
|
||||
// Build Statement per Job, Add to Pending Array
|
||||
stmt := sq.Update("job")
|
||||
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
||||
if err != nil {
|
||||
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
||||
ce++
|
||||
continue
|
||||
}
|
||||
stmt = stmt.Where("job.id = ?", job.ID)
|
||||
|
||||
pendingStatements = append(pendingStatements, stmt)
|
||||
log.Debugf("Job %d took %s", job.JobID, time.Since(s_job))
|
||||
}
|
||||
|
||||
t, err := jobRepo.TransactionInit()
|
||||
if err != nil {
|
||||
log.Errorf("failed TransactionInit %v", err)
|
||||
log.Errorf("skipped %d transactions for cluster %s", len(pendingStatements), cluster.Name)
|
||||
ce += len(pendingStatements)
|
||||
} else {
|
||||
for _, ps := range pendingStatements {
|
||||
query, args, err := ps.ToSql()
|
||||
if err != nil {
|
||||
log.Errorf("failed in ToSQL conversion: %v", err)
|
||||
ce++
|
||||
} else {
|
||||
// args...: Footprint-JSON, Energyfootprint-JSON, TotalEnergy, JobID
|
||||
jobRepo.TransactionAdd(t, query, args...)
|
||||
c++
|
||||
}
|
||||
}
|
||||
jobRepo.TransactionEnd(t)
|
||||
}
|
||||
log.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
|
||||
}
|
||||
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
||||
}))
|
||||
}
|
@ -4,7 +4,13 @@
|
||||
// license that can be found in the LICENSE file.
|
||||
package util
|
||||
|
||||
import "golang.org/x/exp/constraints"
|
||||
import (
|
||||
"golang.org/x/exp/constraints"
|
||||
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
func Min[T constraints.Ordered](a, b T) T {
|
||||
if a < b {
|
||||
@ -19,3 +25,36 @@ func Max[T constraints.Ordered](a, b T) T {
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func sortedCopy(input []float64) []float64 {
|
||||
sorted := make([]float64, len(input))
|
||||
copy(sorted, input)
|
||||
sort.Float64s(sorted)
|
||||
return sorted
|
||||
}
|
||||
|
||||
func Mean(input []float64) (float64, error) {
|
||||
if len(input) == 0 {
|
||||
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
|
||||
}
|
||||
sum := 0.0
|
||||
for _, n := range input {
|
||||
sum += n
|
||||
}
|
||||
return sum / float64(len(input)), nil
|
||||
}
|
||||
|
||||
func Median(input []float64) (median float64, err error) {
|
||||
c := sortedCopy(input)
|
||||
// Even numbers: add the two middle numbers, divide by two (use mean function)
|
||||
// Odd numbers: Use the middle number
|
||||
l := len(c)
|
||||
if l == 0 {
|
||||
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
|
||||
} else if l%2 == 0 {
|
||||
median, _ = Mean(c[l/2-1 : l/2+1])
|
||||
} else {
|
||||
median = c[l/2]
|
||||
}
|
||||
return median, nil
|
||||
}
|
||||
|
@ -7,13 +7,14 @@ package archive
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
const Version uint64 = 1
|
||||
const Version uint64 = 2
|
||||
|
||||
type ArchiveBackend interface {
|
||||
Init(rawConfig json.RawMessage) (uint64, error)
|
||||
@ -26,6 +27,8 @@ type ArchiveBackend interface {
|
||||
|
||||
LoadJobData(job *schema.Job) (schema.JobData, error)
|
||||
|
||||
LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error)
|
||||
|
||||
LoadClusterCfg(name string) (*schema.Cluster, error)
|
||||
|
||||
StoreJobMeta(jobMeta *schema.JobMeta) error
|
||||
@ -53,47 +56,55 @@ type JobContainer struct {
|
||||
}
|
||||
|
||||
var (
|
||||
initOnce sync.Once
|
||||
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
ar ArchiveBackend
|
||||
useArchive bool
|
||||
)
|
||||
|
||||
func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||
useArchive = !disableArchive
|
||||
var err error
|
||||
|
||||
var cfg struct {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
initOnce.Do(func() {
|
||||
useArchive = !disableArchive
|
||||
|
||||
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
return err
|
||||
}
|
||||
var cfg struct {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
|
||||
switch cfg.Kind {
|
||||
case "file":
|
||||
ar = &FsArchive{}
|
||||
// case "s3":
|
||||
// ar = &S3Archive{}
|
||||
default:
|
||||
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
|
||||
}
|
||||
if err = json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
return
|
||||
}
|
||||
|
||||
version, err := ar.Init(rawConfig)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing archiveBackend")
|
||||
return err
|
||||
}
|
||||
log.Infof("Load archive version %d", version)
|
||||
switch cfg.Kind {
|
||||
case "file":
|
||||
ar = &FsArchive{}
|
||||
// case "s3":
|
||||
// ar = &S3Archive{}
|
||||
default:
|
||||
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
|
||||
}
|
||||
|
||||
return initClusterConfig()
|
||||
var version uint64
|
||||
version, err = ar.Init(rawConfig)
|
||||
if err != nil {
|
||||
log.Errorf("Error while initializing archiveBackend: %s", err.Error())
|
||||
return
|
||||
}
|
||||
log.Infof("Load archive version %d", version)
|
||||
|
||||
err = initClusterConfig()
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func GetHandle() ArchiveBackend {
|
||||
return ar
|
||||
}
|
||||
|
||||
// Helper to metricdata.LoadAverages().
|
||||
// Helper to metricdataloader.LoadAverages().
|
||||
func LoadAveragesFromArchive(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
@ -101,7 +112,7 @@ func LoadAveragesFromArchive(
|
||||
) error {
|
||||
metaFile, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job metadata from archiveBackend")
|
||||
log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
@ -116,10 +127,55 @@ func LoadAveragesFromArchive(
|
||||
return nil
|
||||
}
|
||||
|
||||
// Helper to metricdataloader.LoadJobStats().
|
||||
func LoadStatsFromArchive(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
) (map[string]schema.MetricStatistics, error) {
|
||||
data := make(map[string]schema.MetricStatistics, len(metrics))
|
||||
metaFile, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error())
|
||||
return data, err
|
||||
}
|
||||
|
||||
for _, m := range metrics {
|
||||
stat, ok := metaFile.Statistics[m]
|
||||
if !ok {
|
||||
data[m] = schema.MetricStatistics{Min: 0.0, Avg: 0.0, Max: 0.0}
|
||||
continue
|
||||
}
|
||||
|
||||
data[m] = schema.MetricStatistics{
|
||||
Avg: stat.Avg,
|
||||
Min: stat.Min,
|
||||
Max: stat.Max,
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Helper to metricdataloader.LoadScopedJobStats().
|
||||
func LoadScopedStatsFromArchive(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
) (schema.ScopedJobStats, error) {
|
||||
|
||||
data, err := ar.LoadJobStats(job)
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading job stats from archiveBackend: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
||||
metaFile, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job metadata from archiveBackend")
|
||||
log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -135,7 +191,7 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
|
||||
|
||||
jobMeta, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job metadata from archiveBackend")
|
||||
log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
@ -155,15 +211,16 @@ func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||
|
||||
jobMeta, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job metadata from archiveBackend")
|
||||
log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
jobMeta.Tags = make([]*schema.Tag, 0)
|
||||
for _, tag := range tags {
|
||||
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
|
||||
Name: tag.Name,
|
||||
Type: tag.Type,
|
||||
Name: tag.Name,
|
||||
Type: tag.Type,
|
||||
Scope: tag.Scope,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -12,13 +12,16 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var Clusters []*schema.Cluster
|
||||
var nodeLists map[string]map[string]NodeList
|
||||
var (
|
||||
Clusters []*schema.Cluster
|
||||
GlobalMetricList []*schema.GlobalMetricListItem
|
||||
NodeLists map[string]map[string]NodeList
|
||||
)
|
||||
|
||||
func initClusterConfig() error {
|
||||
|
||||
Clusters = []*schema.Cluster{}
|
||||
nodeLists = map[string]map[string]NodeList{}
|
||||
NodeLists = map[string]map[string]NodeList{}
|
||||
metricLookup := make(map[string]schema.GlobalMetricListItem)
|
||||
|
||||
for _, c := range ar.GetClusters() {
|
||||
|
||||
@ -49,11 +52,79 @@ func initClusterConfig() error {
|
||||
if !mc.Scope.Valid() {
|
||||
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
|
||||
}
|
||||
|
||||
ml, ok := metricLookup[mc.Name]
|
||||
if !ok {
|
||||
metricLookup[mc.Name] = schema.GlobalMetricListItem{
|
||||
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint,
|
||||
}
|
||||
ml = metricLookup[mc.Name]
|
||||
}
|
||||
availability := schema.ClusterSupport{Cluster: cluster.Name}
|
||||
scLookup := make(map[string]*schema.SubClusterConfig)
|
||||
|
||||
for _, scc := range mc.SubClusters {
|
||||
scLookup[scc.Name] = scc
|
||||
}
|
||||
|
||||
for _, sc := range cluster.SubClusters {
|
||||
newMetric := &schema.MetricConfig{
|
||||
Unit: mc.Unit,
|
||||
Energy: mc.Energy,
|
||||
Name: mc.Name,
|
||||
Scope: mc.Scope,
|
||||
Aggregation: mc.Aggregation,
|
||||
Peak: mc.Peak,
|
||||
Caution: mc.Caution,
|
||||
Alert: mc.Alert,
|
||||
Timestep: mc.Timestep,
|
||||
Normal: mc.Normal,
|
||||
LowerIsBetter: mc.LowerIsBetter,
|
||||
}
|
||||
|
||||
if mc.Footprint != "" {
|
||||
newMetric.Footprint = mc.Footprint
|
||||
}
|
||||
|
||||
if cfg, ok := scLookup[sc.Name]; ok {
|
||||
if !cfg.Remove {
|
||||
availability.SubClusters = append(availability.SubClusters, sc.Name)
|
||||
newMetric.Peak = cfg.Peak
|
||||
newMetric.Normal = cfg.Normal
|
||||
newMetric.Caution = cfg.Caution
|
||||
newMetric.Alert = cfg.Alert
|
||||
newMetric.Footprint = cfg.Footprint
|
||||
newMetric.Energy = cfg.Energy
|
||||
newMetric.LowerIsBetter = cfg.LowerIsBetter
|
||||
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
|
||||
|
||||
if newMetric.Footprint != "" {
|
||||
sc.Footprint = append(sc.Footprint, newMetric.Name)
|
||||
ml.Footprint = newMetric.Footprint
|
||||
}
|
||||
if newMetric.Energy != "" {
|
||||
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
availability.SubClusters = append(availability.SubClusters, sc.Name)
|
||||
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
|
||||
|
||||
if newMetric.Footprint != "" {
|
||||
sc.Footprint = append(sc.Footprint, newMetric.Name)
|
||||
}
|
||||
if newMetric.Energy != "" {
|
||||
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
|
||||
metricLookup[mc.Name] = ml
|
||||
}
|
||||
|
||||
Clusters = append(Clusters, cluster)
|
||||
|
||||
nodeLists[cluster.Name] = make(map[string]NodeList)
|
||||
NodeLists[cluster.Name] = make(map[string]NodeList)
|
||||
for _, sc := range cluster.SubClusters {
|
||||
if sc.Nodes == "*" {
|
||||
continue
|
||||
@ -63,15 +134,18 @@ func initClusterConfig() error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
|
||||
}
|
||||
nodeLists[cluster.Name][sc.Name] = nl
|
||||
NodeLists[cluster.Name][sc.Name] = nl
|
||||
}
|
||||
}
|
||||
|
||||
for _, ml := range metricLookup {
|
||||
GlobalMetricList = append(GlobalMetricList, &ml)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetCluster(cluster string) *schema.Cluster {
|
||||
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
return c
|
||||
@ -90,11 +164,10 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("Subcluster '%v' not found for cluster '%v', or cluster '%v' not configured!", subcluster, cluster, cluster)
|
||||
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
|
||||
}
|
||||
|
||||
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, m := range c.MetricConfig {
|
||||
@ -110,7 +183,6 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||
// on its cluster and resources.
|
||||
func AssignSubCluster(job *schema.BaseJob) error {
|
||||
|
||||
cluster := GetCluster(job.Cluster)
|
||||
if cluster == nil {
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
||||
@ -130,7 +202,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
||||
}
|
||||
|
||||
host0 := job.Resources[0].Hostname
|
||||
for sc, nl := range nodeLists[job.Cluster] {
|
||||
for sc, nl := range NodeLists[job.Cluster] {
|
||||
if nl != nil && nl.Contains(host0) {
|
||||
job.SubCluster = sc
|
||||
return nil
|
||||
@ -146,8 +218,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
||||
}
|
||||
|
||||
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||
|
||||
for sc, nl := range nodeLists[cluster] {
|
||||
for sc, nl := range NodeLists[cluster] {
|
||||
if nl != nil && nl.Contains(hostname) {
|
||||
return sc, nil
|
||||
}
|
||||
@ -164,3 +235,13 @@ func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||
|
||||
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
|
||||
}
|
||||
|
||||
func MetricIndex(mc []schema.MetricConfig, name string) (int, error) {
|
||||
for i, m := range mc {
|
||||
if m.Name == name {
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("unknown metric name %s", name)
|
||||
}
|
||||
|
39
pkg/archive/clusterConfig_test.go
Normal file
39
pkg/archive/clusterConfig_test.go
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archive_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
)
|
||||
|
||||
func TestClusterConfig(t *testing.T) {
|
||||
if err := archive.Init(json.RawMessage("{\"kind\": \"file\",\"path\": \"testdata/archive\"}"), false); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
sc, err := archive.GetSubCluster("fritz", "spr1tb")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// spew.Dump(sc.MetricConfig)
|
||||
if len(sc.Footprint) != 3 {
|
||||
t.Fail()
|
||||
}
|
||||
if len(sc.MetricConfig) != 15 {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
for _, metric := range sc.MetricConfig {
|
||||
if metric.LowerIsBetter && metric.Name != "mem_used" {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
// spew.Dump(archive.GlobalMetricList)
|
||||
// t.Fail()
|
||||
}
|
@ -115,6 +115,40 @@ func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, error) {
|
||||
f, err := os.Open(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("fsBackend LoadJobStats()- %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if isCompressed {
|
||||
r, err := gzip.NewReader(f)
|
||||
if err != nil {
|
||||
log.Errorf(" %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
if config.Keys.Validate {
|
||||
if err := schema.Validate(schema.Data, r); err != nil {
|
||||
return nil, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return DecodeJobStats(r, filename)
|
||||
} else {
|
||||
if config.Keys.Validate {
|
||||
if err := schema.Validate(schema.Data, bufio.NewReader(f)); err != nil {
|
||||
return nil, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
}
|
||||
return DecodeJobStats(bufio.NewReader(f), filename)
|
||||
}
|
||||
}
|
||||
|
||||
func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) {
|
||||
|
||||
var config FsArchiveConfig
|
||||
@ -389,6 +423,18 @@ func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
|
||||
return loadJobData(filename, isCompressed)
|
||||
}
|
||||
|
||||
func (fsa *FsArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) {
|
||||
var isCompressed bool = true
|
||||
filename := getPath(job, fsa.path, "data.json.gz")
|
||||
|
||||
if !util.CheckFileExists(filename) {
|
||||
filename = getPath(job, fsa.path, "data.json")
|
||||
isCompressed = false
|
||||
}
|
||||
|
||||
return loadJobStats(filename, isCompressed)
|
||||
}
|
||||
|
||||
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
|
||||
filename := getPath(job, fsa.path, "meta.json")
|
||||
return loadJobMeta(filename)
|
||||
|
@ -30,6 +30,7 @@ func TestInitNoJson(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitNotExists(t *testing.T) {
|
||||
var fsa FsArchive
|
||||
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/job-archive\"}"))
|
||||
@ -47,10 +48,10 @@ func TestInit(t *testing.T) {
|
||||
if fsa.path != "testdata/archive" {
|
||||
t.Fail()
|
||||
}
|
||||
if version != 1 {
|
||||
if version != 2 {
|
||||
t.Fail()
|
||||
}
|
||||
if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" {
|
||||
if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
@ -133,7 +134,6 @@ func TestLoadJobData(t *testing.T) {
|
||||
}
|
||||
|
||||
func BenchmarkLoadJobData(b *testing.B) {
|
||||
|
||||
tmpdir := b.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
util.CopyDir("./testdata/archive/", jobarchive)
|
||||
@ -157,7 +157,6 @@ func BenchmarkLoadJobData(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkLoadJobDataCompressed(b *testing.B) {
|
||||
|
||||
tmpdir := b.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
util.CopyDir("./testdata/archive/", jobarchive)
|
||||
|
@ -9,8 +9,8 @@ import (
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
||||
@ -32,6 +32,43 @@ func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
func DecodeJobStats(r io.Reader, k string) (schema.ScopedJobStats, error) {
|
||||
jobData, err := DecodeJobData(r, k)
|
||||
// Convert schema.JobData to schema.ScopedJobStats
|
||||
if jobData != nil {
|
||||
scopedJobStats := make(schema.ScopedJobStats)
|
||||
for metric, metricData := range jobData {
|
||||
if _, ok := scopedJobStats[metric]; !ok {
|
||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
||||
}
|
||||
|
||||
for scope, jobMetric := range metricData {
|
||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
||||
}
|
||||
|
||||
for _, series := range jobMetric.Series {
|
||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
||||
Hostname: series.Hostname,
|
||||
Id: series.Id,
|
||||
Data: &series.Statistics,
|
||||
})
|
||||
}
|
||||
|
||||
// So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty
|
||||
if len(scopedJobStats[metric][scope]) == 0 {
|
||||
delete(scopedJobStats[metric], scope)
|
||||
if len(scopedJobStats[metric]) == 0 {
|
||||
delete(scopedJobStats, metric)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return scopedJobStats, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
|
||||
var d schema.JobMeta
|
||||
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
||||
|
2772
pkg/archive/testdata/archive/alex/cluster.json
vendored
Normal file
2772
pkg/archive/testdata/archive/alex/cluster.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2246
pkg/archive/testdata/archive/fritz/cluster.json
vendored
Normal file
2246
pkg/archive/testdata/archive/fritz/cluster.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2
pkg/archive/testdata/archive/version.txt
vendored
2
pkg/archive/testdata/archive/version.txt
vendored
@ -1 +1 @@
|
||||
1
|
||||
2
|
||||
|
236
pkg/log/log.go
236
pkg/log/log.go
@ -46,12 +46,12 @@ var loglevel string = "info"
|
||||
/* CONFIG */
|
||||
|
||||
func Init(lvl string, logdate bool) {
|
||||
|
||||
// Discard I/O for all writers below selected loglevel; <CRITICAL> is always written.
|
||||
switch lvl {
|
||||
case "crit":
|
||||
ErrWriter = io.Discard
|
||||
fallthrough
|
||||
case "err", "fatal":
|
||||
case "err":
|
||||
WarnWriter = io.Discard
|
||||
fallthrough
|
||||
case "warn":
|
||||
@ -63,8 +63,7 @@ func Init(lvl string, logdate bool) {
|
||||
// Nothing to do...
|
||||
break
|
||||
default:
|
||||
fmt.Printf("pkg/log: Flag 'loglevel' has invalid value %#v\npkg/log: Will use default loglevel 'debug'\n", lvl)
|
||||
//SetLogLevel("debug")
|
||||
fmt.Printf("pkg/log: Flag 'loglevel' has invalid value %#v\npkg/log: Will use default loglevel '%s'\n", lvl, loglevel)
|
||||
}
|
||||
|
||||
if !logdate {
|
||||
@ -84,109 +83,138 @@ func Init(lvl string, logdate bool) {
|
||||
loglevel = lvl
|
||||
}
|
||||
|
||||
/* PRINT */
|
||||
|
||||
// Private helper
|
||||
func printStr(v ...interface{}) string {
|
||||
return fmt.Sprint(v...)
|
||||
}
|
||||
|
||||
// Uses Info() -> If errorpath required at some point:
|
||||
// Will need own writer with 'Output(2, out)' to correctly render path
|
||||
func Print(v ...interface{}) {
|
||||
Info(v...)
|
||||
}
|
||||
|
||||
func Debug(v ...interface{}) {
|
||||
DebugLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
func Info(v ...interface{}) {
|
||||
InfoLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
func Warn(v ...interface{}) {
|
||||
WarnLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
func Error(v ...interface{}) {
|
||||
ErrLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Writes panic stacktrace, but keeps application alive
|
||||
func Panic(v ...interface{}) {
|
||||
ErrLog.Output(2, printStr(v...))
|
||||
panic("Panic triggered ...")
|
||||
}
|
||||
|
||||
func Crit(v ...interface{}) {
|
||||
CritLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Writes critical log, stops application
|
||||
func Fatal(v ...interface{}) {
|
||||
CritLog.Output(2, printStr(v...))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
/* PRINT FORMAT*/
|
||||
|
||||
// Private helper
|
||||
func printfStr(format string, v ...interface{}) string {
|
||||
return fmt.Sprintf(format, v...)
|
||||
}
|
||||
|
||||
// Uses Infof() -> If errorpath required at some point:
|
||||
// Will need own writer with 'Output(2, out)' to correctly render path
|
||||
func Printf(format string, v ...interface{}) {
|
||||
Infof(format, v...)
|
||||
}
|
||||
|
||||
func Debugf(format string, v ...interface{}) {
|
||||
DebugLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
func Infof(format string, v ...interface{}) {
|
||||
InfoLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
func Warnf(format string, v ...interface{}) {
|
||||
WarnLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
func Errorf(format string, v ...interface{}) {
|
||||
ErrLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Writes panic stacktrace, but keeps application alive
|
||||
func Panicf(format string, v ...interface{}) {
|
||||
ErrLog.Output(2, printfStr(format, v...))
|
||||
panic("Panic triggered ...")
|
||||
}
|
||||
|
||||
func Critf(format string, v ...interface{}) {
|
||||
CritLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Writes crit log, stops application
|
||||
func Fatalf(format string, v ...interface{}) {
|
||||
CritLog.Output(2, printfStr(format, v...))
|
||||
os.Exit(1)
|
||||
}
|
||||
/* HELPER */
|
||||
|
||||
func Loglevel() string {
|
||||
return loglevel
|
||||
}
|
||||
|
||||
/* SPECIAL */
|
||||
/* PRIVATE HELPER */
|
||||
|
||||
// func Finfof(w io.Writer, format string, v ...interface{}) {
|
||||
// if w != io.Discard {
|
||||
// if logDateTime {
|
||||
// currentTime := time.Now()
|
||||
// fmt.Fprintf(InfoWriter, currentTime.String()+InfoPrefix+format+"\n", v...)
|
||||
// } else {
|
||||
// fmt.Fprintf(InfoWriter, InfoPrefix+format+"\n", v...)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// Return unformatted string
|
||||
func printStr(v ...interface{}) string {
|
||||
return fmt.Sprint(v...)
|
||||
}
|
||||
|
||||
// Return formatted string
|
||||
func printfStr(format string, v ...interface{}) string {
|
||||
return fmt.Sprintf(format, v...)
|
||||
}
|
||||
|
||||
/* PRINT */
|
||||
|
||||
// Prints to STDOUT without string formatting; application continues.
|
||||
// Used for special cases not requiring log information like date or location.
|
||||
func Print(v ...interface{}) {
|
||||
fmt.Fprintln(os.Stdout, v...)
|
||||
}
|
||||
|
||||
// Prints to STDOUT without string formatting; application exits with error code 0.
|
||||
// Used for exiting succesfully with message after expected outcome, e.g. successful single-call application runs.
|
||||
func Exit(v ...interface{}) {
|
||||
fmt.Fprintln(os.Stdout, v...)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Prints to STDOUT without string formatting; application exits with error code 1.
|
||||
// Used for terminating with message after to be expected errors, e.g. wrong arguments or during init().
|
||||
func Abort(v ...interface{}) {
|
||||
fmt.Fprintln(os.Stdout, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Prints to DEBUG writer without string formatting; application continues.
|
||||
// Used for logging additional information, primarily for development.
|
||||
func Debug(v ...interface{}) {
|
||||
DebugLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Prints to INFO writer without string formatting; application continues.
|
||||
// Used for logging additional information, e.g. notable returns or common fail-cases.
|
||||
func Info(v ...interface{}) {
|
||||
InfoLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Prints to WARNING writer without string formatting; application continues.
|
||||
// Used for logging important information, e.g. uncommon edge-cases or administration related information.
|
||||
func Warn(v ...interface{}) {
|
||||
WarnLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Prints to ERROR writer without string formatting; application continues.
|
||||
// Used for logging errors, but code still can return default(s) or nil.
|
||||
func Error(v ...interface{}) {
|
||||
ErrLog.Output(2, printStr(v...))
|
||||
}
|
||||
|
||||
// Prints to CRITICAL writer without string formatting; application exits with error code 1.
|
||||
// Used for terminating on unexpected errors with date and code location.
|
||||
func Fatal(v ...interface{}) {
|
||||
CritLog.Output(2, printStr(v...))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Prints to PANIC function without string formatting; application exits with panic.
|
||||
// Used for terminating on unexpected errors with stacktrace.
|
||||
func Panic(v ...interface{}) {
|
||||
panic(printStr(v...))
|
||||
}
|
||||
|
||||
/* PRINT FORMAT*/
|
||||
|
||||
// Prints to STDOUT with string formatting; application continues.
|
||||
// Used for special cases not requiring log information like date or location.
|
||||
func Printf(format string, v ...interface{}) {
|
||||
fmt.Fprintf(os.Stdout, format, v...)
|
||||
}
|
||||
|
||||
// Prints to STDOUT with string formatting; application exits with error code 0.
|
||||
// Used for exiting succesfully with message after expected outcome, e.g. successful single-call application runs.
|
||||
func Exitf(format string, v ...interface{}) {
|
||||
fmt.Fprintf(os.Stdout, format, v...)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Prints to STDOUT with string formatting; application exits with error code 1.
|
||||
// Used for terminating with message after to be expected errors, e.g. wrong arguments or during init().
|
||||
func Abortf(format string, v ...interface{}) {
|
||||
fmt.Fprintf(os.Stdout, format, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Prints to DEBUG writer with string formatting; application continues.
|
||||
// Used for logging additional information, primarily for development.
|
||||
func Debugf(format string, v ...interface{}) {
|
||||
DebugLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Prints to INFO writer with string formatting; application continues.
|
||||
// Used for logging additional information, e.g. notable returns or common fail-cases.
|
||||
func Infof(format string, v ...interface{}) {
|
||||
InfoLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Prints to WARNING writer with string formatting; application continues.
|
||||
// Used for logging important information, e.g. uncommon edge-cases or administration related information.
|
||||
func Warnf(format string, v ...interface{}) {
|
||||
WarnLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Prints to ERROR writer with string formatting; application continues.
|
||||
// Used for logging errors, but code still can return default(s) or nil.
|
||||
func Errorf(format string, v ...interface{}) {
|
||||
ErrLog.Output(2, printfStr(format, v...))
|
||||
}
|
||||
|
||||
// Prints to CRITICAL writer with string formatting; application exits with error code 1.
|
||||
// Used for terminating on unexpected errors with date and code location.
|
||||
func Fatalf(format string, v ...interface{}) {
|
||||
CritLog.Output(2, printfStr(format, v...))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Prints to PANIC function with string formatting; application exits with panic.
|
||||
// Used for terminating on unexpected errors with stacktrace.
|
||||
func Panicf(format string, v ...interface{}) {
|
||||
panic(printfStr(format, v...))
|
||||
}
|
||||
|
123
pkg/resampler/resampler.go
Normal file
123
pkg/resampler/resampler.go
Normal file
@ -0,0 +1,123 @@
|
||||
package resampler
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
func SimpleResampler(data []schema.Float, old_frequency int64, new_frequency int64) ([]schema.Float, int64, error) {
|
||||
if old_frequency == 0 || new_frequency == 0 || new_frequency <= old_frequency {
|
||||
return data, old_frequency, nil
|
||||
}
|
||||
|
||||
if new_frequency%old_frequency != 0 {
|
||||
return nil, 0, errors.New("new sampling frequency should be multiple of the old frequency")
|
||||
}
|
||||
|
||||
var step int = int(new_frequency / old_frequency)
|
||||
var new_data_length = len(data) / step
|
||||
|
||||
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
|
||||
return data, old_frequency, nil
|
||||
}
|
||||
|
||||
new_data := make([]schema.Float, new_data_length)
|
||||
|
||||
for i := 0; i < new_data_length; i++ {
|
||||
new_data[i] = data[i*step]
|
||||
}
|
||||
|
||||
return new_data, new_frequency, nil
|
||||
}
|
||||
|
||||
// Inspired by one of the algorithms from https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
|
||||
// Adapted from https://github.com/haoel/downsampling/blob/master/core/lttb.go
|
||||
func LargestTriangleThreeBucket(data []schema.Float, old_frequency int, new_frequency int) ([]schema.Float, int, error) {
|
||||
|
||||
if old_frequency == 0 || new_frequency == 0 || new_frequency <= old_frequency {
|
||||
return data, old_frequency, nil
|
||||
}
|
||||
|
||||
if new_frequency%old_frequency != 0 {
|
||||
return nil, 0, errors.New(fmt.Sprintf("new sampling frequency : %d should be multiple of the old frequency : %d", new_frequency, old_frequency))
|
||||
}
|
||||
|
||||
var step int = int(new_frequency / old_frequency)
|
||||
var new_data_length = len(data) / step
|
||||
|
||||
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
|
||||
return data, old_frequency, nil
|
||||
}
|
||||
|
||||
new_data := make([]schema.Float, 0, new_data_length)
|
||||
|
||||
// Bucket size. Leave room for start and end data points
|
||||
bucketSize := float64(len(data)-2) / float64(new_data_length-2)
|
||||
|
||||
new_data = append(new_data, data[0]) // Always add the first point
|
||||
|
||||
// We have 3 pointers represent for
|
||||
// > bucketLow - the current bucket's beginning location
|
||||
// > bucketMiddle - the current bucket's ending location,
|
||||
// also the beginning location of next bucket
|
||||
// > bucketHight - the next bucket's ending location.
|
||||
bucketLow := 1
|
||||
bucketMiddle := int(math.Floor(bucketSize)) + 1
|
||||
|
||||
var prevMaxAreaPoint int
|
||||
|
||||
for i := 0; i < new_data_length-2; i++ {
|
||||
|
||||
bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1
|
||||
if bucketHigh >= len(data)-1 {
|
||||
bucketHigh = len(data) - 2
|
||||
}
|
||||
|
||||
// Calculate point average for next bucket (containing c)
|
||||
avgPointX, avgPointY := calculateAverageDataPoint(data[bucketMiddle:bucketHigh+1], int64(bucketMiddle))
|
||||
|
||||
// Get the range for current bucket
|
||||
currBucketStart := bucketLow
|
||||
currBucketEnd := bucketMiddle
|
||||
|
||||
// Point a
|
||||
pointX := prevMaxAreaPoint
|
||||
pointY := data[prevMaxAreaPoint]
|
||||
|
||||
maxArea := -1.0
|
||||
|
||||
var maxAreaPoint int
|
||||
flag_ := 0
|
||||
for ; currBucketStart < currBucketEnd; currBucketStart++ {
|
||||
|
||||
area := calculateTriangleArea(schema.Float(pointX), pointY, avgPointX, avgPointY, schema.Float(currBucketStart), data[currBucketStart])
|
||||
if area > maxArea {
|
||||
maxArea = area
|
||||
maxAreaPoint = currBucketStart
|
||||
}
|
||||
if math.IsNaN(float64(avgPointY)) {
|
||||
flag_ = 1
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if flag_ == 1 {
|
||||
new_data = append(new_data, schema.NaN) // Pick this point from the bucket
|
||||
|
||||
} else {
|
||||
new_data = append(new_data, data[maxAreaPoint]) // Pick this point from the bucket
|
||||
}
|
||||
prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint
|
||||
|
||||
//move to the next window
|
||||
bucketLow = bucketMiddle
|
||||
bucketMiddle = bucketHigh
|
||||
}
|
||||
|
||||
new_data = append(new_data, data[len(data)-1]) // Always add last
|
||||
|
||||
return new_data, new_frequency, nil
|
||||
}
|
35
pkg/resampler/util.go
Normal file
35
pkg/resampler/util.go
Normal file
@ -0,0 +1,35 @@
|
||||
package resampler
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
func calculateTriangleArea(paX, paY, pbX, pbY, pcX, pcY schema.Float) float64 {
|
||||
area := ((paX-pcX)*(pbY-paY) - (paX-pbX)*(pcY-paY)) * 0.5
|
||||
return math.Abs(float64(area))
|
||||
}
|
||||
|
||||
func calculateAverageDataPoint(points []schema.Float, xStart int64) (avgX schema.Float, avgY schema.Float) {
|
||||
flag := 0
|
||||
for _, point := range points {
|
||||
avgX += schema.Float(xStart)
|
||||
avgY += point
|
||||
xStart++
|
||||
if math.IsNaN(float64(point)) {
|
||||
flag = 1
|
||||
}
|
||||
}
|
||||
|
||||
l := schema.Float(len(points))
|
||||
|
||||
avgX /= l
|
||||
avgY /= l
|
||||
|
||||
if flag == 1 {
|
||||
return avgX, schema.NaN
|
||||
} else {
|
||||
return avgX, avgY
|
||||
}
|
||||
}
|
@ -5,85 +5,16 @@
|
||||
package runtimeEnv
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
// Very simple and limited .env file reader.
|
||||
// All variable definitions found are directly
|
||||
// added to the processes environment.
|
||||
func LoadEnv(file string) error {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
log.Error("Error while opening .env file")
|
||||
return err
|
||||
}
|
||||
|
||||
defer f.Close()
|
||||
s := bufio.NewScanner(bufio.NewReader(f))
|
||||
for s.Scan() {
|
||||
line := s.Text()
|
||||
if strings.HasPrefix(line, "#") || len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(line, "#") {
|
||||
return errors.New("'#' are only supported at the start of a line")
|
||||
}
|
||||
|
||||
line = strings.TrimPrefix(line, "export ")
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
|
||||
}
|
||||
|
||||
key := strings.TrimSpace(parts[0])
|
||||
val := strings.TrimSpace(parts[1])
|
||||
if strings.HasPrefix(val, "\"") {
|
||||
if !strings.HasSuffix(val, "\"") {
|
||||
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
|
||||
}
|
||||
|
||||
runes := []rune(val[1 : len(val)-1])
|
||||
sb := strings.Builder{}
|
||||
for i := 0; i < len(runes); i++ {
|
||||
if runes[i] == '\\' {
|
||||
i++
|
||||
switch runes[i] {
|
||||
case 'n':
|
||||
sb.WriteRune('\n')
|
||||
case 'r':
|
||||
sb.WriteRune('\r')
|
||||
case 't':
|
||||
sb.WriteRune('\t')
|
||||
case '"':
|
||||
sb.WriteRune('"')
|
||||
default:
|
||||
return fmt.Errorf("RUNTIME/SETUP > unsupported escape sequence in quoted string: backslash %#v", runes[i])
|
||||
}
|
||||
continue
|
||||
}
|
||||
sb.WriteRune(runes[i])
|
||||
}
|
||||
|
||||
val = sb.String()
|
||||
}
|
||||
|
||||
os.Setenv(key, val)
|
||||
}
|
||||
|
||||
return s.Err()
|
||||
}
|
||||
|
||||
// Changes the processes user and group to that
|
||||
// specified in the config.json. The go runtime
|
||||
// takes care of all threads (and not only the calling one)
|
@ -30,38 +30,47 @@ type MetricValue struct {
|
||||
}
|
||||
|
||||
type SubCluster struct {
|
||||
Name string `json:"name"`
|
||||
Nodes string `json:"nodes"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
FlopRateScalar MetricValue `json:"flopRateScalar"`
|
||||
FlopRateSimd MetricValue `json:"flopRateSimd"`
|
||||
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
|
||||
Topology Topology `json:"topology"`
|
||||
Name string `json:"name"`
|
||||
Nodes string `json:"nodes"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
Topology Topology `json:"topology"`
|
||||
FlopRateScalar MetricValue `json:"flopRateScalar"`
|
||||
FlopRateSimd MetricValue `json:"flopRateSimd"`
|
||||
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
|
||||
MetricConfig []MetricConfig `json:"metricConfig,omitempty"`
|
||||
Footprint []string `json:"footprint,omitempty"`
|
||||
EnergyFootprint []string `json:"energyFootprint,omitempty"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
}
|
||||
|
||||
type SubClusterConfig struct {
|
||||
Name string `json:"name"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
Remove bool `json:"remove"`
|
||||
Name string `json:"name"`
|
||||
Footprint string `json:"footprint,omitempty"`
|
||||
Energy string `json:"energy"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
Remove bool `json:"remove"`
|
||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
Name string `json:"name"`
|
||||
Unit Unit `json:"unit"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Aggregation string `json:"aggregation"`
|
||||
Timestep int `json:"timestep"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||
Unit Unit `json:"unit"`
|
||||
Energy string `json:"energy"`
|
||||
Name string `json:"name"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Aggregation string `json:"aggregation"`
|
||||
Footprint string `json:"footprint,omitempty"`
|
||||
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||
Peak float64 `json:"peak"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
Timestep int `json:"timestep"`
|
||||
Normal float64 `json:"normal"`
|
||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||
}
|
||||
|
||||
type Cluster struct {
|
||||
@ -70,14 +79,27 @@ type Cluster struct {
|
||||
SubClusters []*SubCluster `json:"subClusters"`
|
||||
}
|
||||
|
||||
type ClusterSupport struct {
|
||||
Cluster string `json:"cluster"`
|
||||
SubClusters []string `json:"subclusters"`
|
||||
}
|
||||
|
||||
type GlobalMetricListItem struct {
|
||||
Name string `json:"name"`
|
||||
Unit Unit `json:"unit"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Footprint string `json:"footprint,omitempty"`
|
||||
Availability []ClusterSupport `json:"availability"`
|
||||
}
|
||||
|
||||
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
|
||||
// hwthread is in that socket, add it to the list. If no hwthreads other than
|
||||
// those in the argument list are assigned to one of the sockets in the first
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetSocketsFromHWThreads(
|
||||
hwthreads []int) (sockets []int, exclusive bool) {
|
||||
|
||||
hwthreads []int,
|
||||
) (sockets []int, exclusive bool) {
|
||||
socketsMap := map[int]int{}
|
||||
for _, hwthread := range hwthreads {
|
||||
for socket, hwthreadsInSocket := range topo.Socket {
|
||||
@ -100,14 +122,46 @@ func (topo *Topology) GetSocketsFromHWThreads(
|
||||
return sockets, exclusive
|
||||
}
|
||||
|
||||
// Return a list of socket IDs given a list of core IDs. Even if just one
|
||||
// core is in that socket, add it to the list. If no cores other than
|
||||
// those in the argument list are assigned to one of the sockets in the first
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetSocketsFromCores (
|
||||
cores []int,
|
||||
) (sockets []int, exclusive bool) {
|
||||
socketsMap := map[int]int{}
|
||||
for _, core := range cores {
|
||||
for _, hwthreadInCore := range topo.Core[core] {
|
||||
for socket, hwthreadsInSocket := range topo.Socket {
|
||||
for _, hwthreadInSocket := range hwthreadsInSocket {
|
||||
if hwthreadInCore == hwthreadInSocket {
|
||||
socketsMap[socket] += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exclusive = true
|
||||
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
|
||||
sockets = make([]int, 0, len(socketsMap))
|
||||
for socket, count := range socketsMap {
|
||||
sockets = append(sockets, socket)
|
||||
exclusive = exclusive && count == hwthreadsPerSocket
|
||||
}
|
||||
|
||||
return sockets, exclusive
|
||||
}
|
||||
|
||||
// Return a list of core IDs given a list of hwthread IDs. Even if just one
|
||||
// hwthread is in that core, add it to the list. If no hwthreads other than
|
||||
// those in the argument list are assigned to one of the cores in the first
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetCoresFromHWThreads(
|
||||
hwthreads []int) (cores []int, exclusive bool) {
|
||||
|
||||
hwthreads []int,
|
||||
) (cores []int, exclusive bool) {
|
||||
coresMap := map[int]int{}
|
||||
for _, hwthread := range hwthreads {
|
||||
for core, hwthreadsInCore := range topo.Core {
|
||||
@ -136,8 +190,8 @@ func (topo *Topology) GetCoresFromHWThreads(
|
||||
// memory domains in the first return value, return true as the second value.
|
||||
// TODO: Optimize this, there must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetMemoryDomainsFromHWThreads(
|
||||
hwthreads []int) (memDoms []int, exclusive bool) {
|
||||
|
||||
hwthreads []int,
|
||||
) (memDoms []int, exclusive bool) {
|
||||
memDomsMap := map[int]int{}
|
||||
for _, hwthread := range hwthreads {
|
||||
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
|
||||
@ -172,7 +226,17 @@ func (topo *Topology) GetAcceleratorID(id int) (string, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
|
||||
// Return list of hardware (string) accelerator IDs
|
||||
func (topo *Topology) GetAcceleratorIDs() []string {
|
||||
accels := make([]string, 0)
|
||||
for _, accel := range topo.Accelerators {
|
||||
accels = append(accels, accel.ID)
|
||||
}
|
||||
return accels
|
||||
}
|
||||
|
||||
// Outdated? Or: Return indices of accelerators in parent array?
|
||||
func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) {
|
||||
accels := make([]int, 0)
|
||||
for _, accel := range topo.Accelerators {
|
||||
id, err := strconv.Atoi(accel.ID)
|
||||
|
@ -24,8 +24,9 @@ type LdapConfig struct {
|
||||
}
|
||||
|
||||
type OpenIDConfig struct {
|
||||
Provider string `json:"provider"`
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
Provider string `json:"provider"`
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||
}
|
||||
|
||||
type JWTAuthConfig struct {
|
||||
@ -45,6 +46,9 @@ type JWTAuthConfig struct {
|
||||
|
||||
// Should an non-existent user be added to the DB based on the information in the token
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
|
||||
// Should an existent user be updated in the DB based on the information in the token
|
||||
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||
}
|
||||
|
||||
type IntRange struct {
|
||||
@ -53,8 +57,9 @@ type IntRange struct {
|
||||
}
|
||||
|
||||
type TimeRange struct {
|
||||
From *time.Time `json:"from"`
|
||||
To *time.Time `json:"to"`
|
||||
From *time.Time `json:"from"`
|
||||
To *time.Time `json:"to"`
|
||||
Range string `json:"range,omitempty"`
|
||||
}
|
||||
|
||||
type FilterRanges struct {
|
||||
@ -76,12 +81,26 @@ type Retention struct {
|
||||
IncludeDB bool `json:"includeDB"`
|
||||
}
|
||||
|
||||
type ResampleConfig struct {
|
||||
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||
Resolutions []int `json:"resolutions"`
|
||||
// Trigger next zoom level at less than this many visible datapoints
|
||||
Trigger int `json:"trigger"`
|
||||
}
|
||||
|
||||
type CronFrequency struct {
|
||||
// Duration Update Worker [Defaults to '5m']
|
||||
DurationWorker string `json:"duration-worker"`
|
||||
// Metric-Footprint Update Worker [Defaults to '10m']
|
||||
FootprintWorker string `json:"footprint-worker"`
|
||||
}
|
||||
|
||||
// Format of the configuration (file). See below for the defaults.
|
||||
type ProgramConfig struct {
|
||||
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||
Addr string `json:"addr"`
|
||||
|
||||
// Addresses from which secured API endpoints can be reached
|
||||
// Addresses from which secured admin API endpoints can be reached, can be wildcard "*"
|
||||
ApiAllowedIPs []string `json:"apiAllowedIPs"`
|
||||
|
||||
// Drop root permissions once .env was read and the port was taken.
|
||||
@ -133,6 +152,9 @@ type ProgramConfig struct {
|
||||
// be provided! Most options here can be overwritten by the user.
|
||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
||||
|
||||
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||
EnableResampling *ResampleConfig `json:"enable-resampling"`
|
||||
|
||||
// Where to store MachineState files
|
||||
MachineStateDir string `json:"machine-state-dir"`
|
||||
|
||||
@ -142,6 +164,13 @@ type ProgramConfig struct {
|
||||
// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views.
|
||||
ShortRunningJobsDuration int `json:"short-running-jobs-duration"`
|
||||
|
||||
// Energy Mix CO2 Emission Constant [g/kWh]
|
||||
// If entered, displays estimated CO2 emission for job based on jobs totalEnergy
|
||||
EmissionConstant int `json:"emission-constant"`
|
||||
|
||||
// Frequency of cron job workers
|
||||
CronFrequency *CronFrequency `json:"cron-frequency"`
|
||||
|
||||
// Array of Clusters
|
||||
Clusters []*ClusterConfig `json:"clusters"`
|
||||
}
|
||||
|
@ -16,30 +16,33 @@ import (
|
||||
// Common subset of Job and JobMeta. Use one of those, not this type directly.
|
||||
|
||||
type BaseJob struct {
|
||||
// The unique identifier of a job
|
||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
|
||||
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
|
||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
|
||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
|
||||
Partition string `json:"partition,omitempty" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
|
||||
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` // The unique identifier of an array job
|
||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
|
||||
// NumCores int32 `json:"numCores" db:"num_cores" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
||||
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
||||
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
|
||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
|
||||
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
|
||||
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"` // SMT threads used by job
|
||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
|
||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
|
||||
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
|
||||
Tags []*Tag `json:"tags,omitempty"` // List of tags
|
||||
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
|
||||
Resources []*Resource `json:"resources"` // Resources used by job
|
||||
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
|
||||
MetaData map[string]string `json:"metaData"` // Additional information about the job
|
||||
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
||||
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
|
||||
Project string `json:"project" db:"project" example:"abcd200"`
|
||||
User string `json:"user" db:"hpc_user" example:"abcd100h"`
|
||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||
Tags []*Tag `json:"tags,omitempty"`
|
||||
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
|
||||
RawFootprint []byte `json:"-" db:"footprint"`
|
||||
RawMetaData []byte `json:"-" db:"meta_data"`
|
||||
RawResources []byte `json:"-" db:"resources"`
|
||||
Resources []*Resource `json:"resources"`
|
||||
EnergyFootprint map[string]float64 `json:"energyFootprint"`
|
||||
Footprint map[string]float64 `json:"footprint"`
|
||||
MetaData map[string]string `json:"metaData"`
|
||||
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
||||
Energy float64 `json:"energy" db:"energy"`
|
||||
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
|
||||
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
|
||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
|
||||
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
|
||||
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
|
||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
|
||||
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
|
||||
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
|
||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
|
||||
}
|
||||
|
||||
// Job struct type
|
||||
@ -49,19 +52,10 @@ type BaseJob struct {
|
||||
// Job model
|
||||
// @Description Information of a HPC job.
|
||||
type Job struct {
|
||||
// The unique identifier of a job in the database
|
||||
ID int64 `json:"id" db:"id"`
|
||||
StartTime time.Time `json:"startTime"`
|
||||
BaseJob
|
||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
|
||||
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
|
||||
MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
|
||||
FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
|
||||
MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
|
||||
LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
|
||||
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
|
||||
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
|
||||
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64
|
||||
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64
|
||||
ID int64 `json:"id" db:"id"`
|
||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"`
|
||||
}
|
||||
|
||||
// JobMeta struct type
|
||||
@ -88,11 +82,10 @@ type JobLinkResultList struct {
|
||||
// JobMeta model
|
||||
// @Description Meta data information of a HPC job.
|
||||
type JobMeta struct {
|
||||
// The unique identifier of a job in the database
|
||||
ID *int64 `json:"id,omitempty"`
|
||||
ID *int64 `json:"id,omitempty"`
|
||||
Statistics map[string]JobStatistics `json:"statistics"`
|
||||
BaseJob
|
||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0)
|
||||
Statistics map[string]JobStatistics `json:"statistics"` // Metric statistics of job
|
||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
|
||||
}
|
||||
|
||||
const (
|
||||
@ -124,18 +117,19 @@ type JobStatistics struct {
|
||||
// Tag model
|
||||
// @Description Defines a tag using name and type.
|
||||
type Tag struct {
|
||||
ID int64 `json:"id" db:"id"` // The unique DB identifier of a tag
|
||||
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
|
||||
Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name
|
||||
Type string `json:"type" db:"tag_type" example:"Debug"`
|
||||
Name string `json:"name" db:"tag_name" example:"Testjob"`
|
||||
Scope string `json:"scope" db:"tag_scope" example:"global"`
|
||||
ID int64 `json:"id" db:"id"`
|
||||
}
|
||||
|
||||
// Resource model
|
||||
// @Description A resource used by a job
|
||||
type Resource struct {
|
||||
Hostname string `json:"hostname"` // Name of the host (= node)
|
||||
HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids
|
||||
Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids
|
||||
Configuration string `json:"configuration,omitempty"` // The configuration options of the node
|
||||
Hostname string `json:"hostname"`
|
||||
Configuration string `json:"configuration,omitempty"`
|
||||
HWThreads []int `json:"hwthreads,omitempty"`
|
||||
Accelerators []string `json:"accelerators,omitempty"`
|
||||
}
|
||||
|
||||
type JobState string
|
||||
|
@ -10,22 +10,31 @@ import (
|
||||
"math"
|
||||
"sort"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
)
|
||||
|
||||
type JobData map[string]map[MetricScope]*JobMetric
|
||||
type ScopedJobStats map[string]map[MetricScope][]*ScopedStats
|
||||
|
||||
type JobMetric struct {
|
||||
Unit Unit `json:"unit"`
|
||||
Timestep int `json:"timestep"`
|
||||
Series []Series `json:"series"`
|
||||
StatisticsSeries *StatsSeries `json:"statisticsSeries,omitempty"`
|
||||
Unit Unit `json:"unit"`
|
||||
Series []Series `json:"series"`
|
||||
Timestep int `json:"timestep"`
|
||||
}
|
||||
|
||||
type Series struct {
|
||||
Hostname string `json:"hostname"`
|
||||
Id *string `json:"id,omitempty"`
|
||||
Statistics MetricStatistics `json:"statistics"`
|
||||
Hostname string `json:"hostname"`
|
||||
Data []Float `json:"data"`
|
||||
Statistics MetricStatistics `json:"statistics"`
|
||||
}
|
||||
|
||||
type ScopedStats struct {
|
||||
Hostname string `json:"hostname"`
|
||||
Id *string `json:"id,omitempty"`
|
||||
Data *MetricStatistics `json:"data"`
|
||||
}
|
||||
|
||||
type MetricStatistics struct {
|
||||
@ -35,10 +44,11 @@ type MetricStatistics struct {
|
||||
}
|
||||
|
||||
type StatsSeries struct {
|
||||
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
||||
Mean []Float `json:"mean"`
|
||||
Median []Float `json:"median"`
|
||||
Min []Float `json:"min"`
|
||||
Max []Float `json:"max"`
|
||||
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
||||
}
|
||||
|
||||
type MetricScope string
|
||||
@ -121,6 +131,7 @@ func (jd *JobData) Size() int {
|
||||
if metric.StatisticsSeries != nil {
|
||||
n += len(metric.StatisticsSeries.Max)
|
||||
n += len(metric.StatisticsSeries.Mean)
|
||||
n += len(metric.StatisticsSeries.Median)
|
||||
n += len(metric.StatisticsSeries.Min)
|
||||
}
|
||||
|
||||
@ -149,53 +160,74 @@ func (jm *JobMetric) AddStatisticsSeries() {
|
||||
}
|
||||
}
|
||||
|
||||
min, mean, max := make([]Float, n), make([]Float, n), make([]Float, n)
|
||||
// mean := make([]Float, n)
|
||||
min, median, max := make([]Float, n), make([]Float, n), make([]Float, n)
|
||||
i := 0
|
||||
for ; i < m; i++ {
|
||||
smin, ssum, smax := math.MaxFloat32, 0.0, -math.MaxFloat32
|
||||
seriesCount := len(jm.Series)
|
||||
// ssum := 0.0
|
||||
smin, smed, smax := math.MaxFloat32, make([]float64, seriesCount), -math.MaxFloat32
|
||||
notnan := 0
|
||||
for j := 0; j < len(jm.Series); j++ {
|
||||
for j := 0; j < seriesCount; j++ {
|
||||
x := float64(jm.Series[j].Data[i])
|
||||
if math.IsNaN(x) {
|
||||
continue
|
||||
}
|
||||
|
||||
notnan += 1
|
||||
ssum += x
|
||||
// ssum += x
|
||||
smed[j] = x
|
||||
smin = math.Min(smin, x)
|
||||
smax = math.Max(smax, x)
|
||||
}
|
||||
|
||||
if notnan < 3 {
|
||||
min[i] = NaN
|
||||
mean[i] = NaN
|
||||
// mean[i] = NaN
|
||||
median[i] = NaN
|
||||
max[i] = NaN
|
||||
} else {
|
||||
min[i] = Float(smin)
|
||||
mean[i] = Float(ssum / float64(notnan))
|
||||
// mean[i] = Float(ssum / float64(notnan))
|
||||
max[i] = Float(smax)
|
||||
|
||||
medianRaw, err := util.Median(smed)
|
||||
if err != nil {
|
||||
median[i] = NaN
|
||||
} else {
|
||||
median[i] = Float(medianRaw)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < n; i++ {
|
||||
min[i] = NaN
|
||||
mean[i] = NaN
|
||||
// mean[i] = NaN
|
||||
median[i] = NaN
|
||||
max[i] = NaN
|
||||
}
|
||||
|
||||
if smooth {
|
||||
for i := 2; i < len(mean)-2; i++ {
|
||||
for i := 2; i < len(median)-2; i++ {
|
||||
if min[i].IsNaN() {
|
||||
continue
|
||||
}
|
||||
|
||||
min[i] = (min[i-2] + min[i-1] + min[i] + min[i+1] + min[i+2]) / 5
|
||||
max[i] = (max[i-2] + max[i-1] + max[i] + max[i+1] + max[i+2]) / 5
|
||||
mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
|
||||
// mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
|
||||
// Reduce Median further
|
||||
smoothRaw := []float64{float64(median[i-2]), float64(median[i-1]), float64(median[i]), float64(median[i+1]), float64(median[i+2])}
|
||||
smoothMedian, err := util.Median(smoothRaw)
|
||||
if err != nil {
|
||||
median[i] = NaN
|
||||
} else {
|
||||
median[i] = Float(smoothMedian)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jm.StatisticsSeries = &StatsSeries{Mean: mean, Min: min, Max: max}
|
||||
jm.StatisticsSeries = &StatsSeries{Median: median, Min: min, Max: max} // Mean: mean
|
||||
}
|
||||
|
||||
func (jd *JobData) AddNodeScope(metric string) bool {
|
||||
@ -204,7 +236,7 @@ func (jd *JobData) AddNodeScope(metric string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
var maxScope MetricScope = MetricScopeInvalid
|
||||
maxScope := MetricScopeInvalid
|
||||
for scope := range scopes {
|
||||
maxScope = maxScope.Max(scope)
|
||||
}
|
||||
@ -266,6 +298,21 @@ func (jd *JobData) AddNodeScope(metric string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (jd *JobData) RoundMetricStats() {
|
||||
// TODO: Make Digit-Precision Configurable? (Currently: Fixed to 2 Digits)
|
||||
for _, scopes := range *jd {
|
||||
for _, jm := range scopes {
|
||||
for index := range jm.Series {
|
||||
jm.Series[index].Statistics = MetricStatistics{
|
||||
Avg: (math.Round(jm.Series[index].Statistics.Avg*100) / 100),
|
||||
Min: (math.Round(jm.Series[index].Statistics.Min*100) / 100),
|
||||
Max: (math.Round(jm.Series[index].Statistics.Max*100) / 100),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (jm *JobMetric) AddPercentiles(ps []int) bool {
|
||||
if jm.StatisticsSeries == nil {
|
||||
jm.AddStatisticsSeries()
|
||||
|
@ -1,284 +1,339 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedfs://cluster.schema.json",
|
||||
"title": "HPC cluster description",
|
||||
"description": "Meta data information of a HPC cluster",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "The unique identifier of a cluster",
|
||||
"type": "string"
|
||||
},
|
||||
"metricConfig": {
|
||||
"description": "Metric specifications",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Metric name",
|
||||
"type": "string"
|
||||
},
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"scope": {
|
||||
"description": "Native measurement resolution",
|
||||
"type": "string"
|
||||
},
|
||||
"timestep": {
|
||||
"description": "Frequency of timeseries points",
|
||||
"type": "integer"
|
||||
},
|
||||
"aggregation": {
|
||||
"description": "How the metric is aggregated",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sum",
|
||||
"avg"
|
||||
]
|
||||
},
|
||||
"peak": {
|
||||
"description": "Metric peak threshold (Upper metric limit)",
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"description": "Metric normal threshold",
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"description": "Metric alert threshold (Requires immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partition metric thresholds",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"type": "number"
|
||||
},
|
||||
"remove": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"unit",
|
||||
"scope",
|
||||
"timestep",
|
||||
"aggregation",
|
||||
"peak",
|
||||
"normal",
|
||||
"caution",
|
||||
"alert"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partitions",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"processorType": {
|
||||
"description": "Processor type",
|
||||
"type": "string"
|
||||
},
|
||||
"socketsPerNode": {
|
||||
"description": "Number of sockets per node",
|
||||
"type": "integer"
|
||||
},
|
||||
"coresPerSocket": {
|
||||
"description": "Number of cores per socket",
|
||||
"type": "integer"
|
||||
},
|
||||
"threadsPerCore": {
|
||||
"description": "Number of SMT threads per core",
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateScalar": {
|
||||
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"description": "Theoretical node peak memory bandwidth in GB/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodes": {
|
||||
"description": "Node list expression",
|
||||
"type": "string"
|
||||
},
|
||||
"topology": {
|
||||
"description": "Node topology",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"node": {
|
||||
"description": "HwTread lists of node",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"socket": {
|
||||
"description": "HwTread lists of sockets",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryDomain": {
|
||||
"description": "HwTread lists of memory domains",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"die": {
|
||||
"description": "HwTread lists of dies",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"core": {
|
||||
"description": "HwTread lists of cores",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"description": "List of of accelerator devices",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique device id"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The accelerator type",
|
||||
"enum": [
|
||||
"Nvidia GPU",
|
||||
"AMD GPU",
|
||||
"Intel GPU"
|
||||
]
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The accelerator model"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"model"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node",
|
||||
"socket",
|
||||
"memoryDomain"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"nodes",
|
||||
"topology",
|
||||
"processorType",
|
||||
"socketsPerNode",
|
||||
"coresPerSocket",
|
||||
"threadsPerCore",
|
||||
"flopRateScalar",
|
||||
"flopRateSimd",
|
||||
"memoryBandwidth"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedfs://cluster.schema.json",
|
||||
"title": "HPC cluster description",
|
||||
"description": "Meta data information of a HPC cluster",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "The unique identifier of a cluster",
|
||||
"type": "string"
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"metricConfig",
|
||||
"subClusters"
|
||||
]
|
||||
"metricConfig": {
|
||||
"description": "Metric specifications",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Metric name",
|
||||
"type": "string"
|
||||
},
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"scope": {
|
||||
"description": "Native measurement resolution",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"node",
|
||||
"socket",
|
||||
"memoryDomain",
|
||||
"core",
|
||||
"hwthread",
|
||||
"accelerator"
|
||||
]
|
||||
},
|
||||
"timestep": {
|
||||
"description": "Frequency of timeseries points in seconds",
|
||||
"type": "integer"
|
||||
},
|
||||
"aggregation": {
|
||||
"description": "How the metric is aggregated",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sum",
|
||||
"avg"
|
||||
]
|
||||
},
|
||||
"footprint": {
|
||||
"description": "Is it a footprint metric and what type",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"avg",
|
||||
"max",
|
||||
"min"
|
||||
]
|
||||
},
|
||||
"energy": {
|
||||
"description": "Is it used to calculate job energy",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"power",
|
||||
"energy"
|
||||
]
|
||||
},
|
||||
"lowerIsBetter": {
|
||||
"description": "Is lower better.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"peak": {
|
||||
"description": "Metric peak threshold (Upper metric limit)",
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"description": "Metric normal threshold",
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"description": "Metric alert threshold (Requires immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partition metric thresholds",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"footprint": {
|
||||
"description": "Is it a footprint metric and what type. Overwrite global setting",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"avg",
|
||||
"max",
|
||||
"min"
|
||||
]
|
||||
},
|
||||
"energy": {
|
||||
"description": "Is it used to calculate job energy. Overwrite global",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"power",
|
||||
"energy"
|
||||
]
|
||||
},
|
||||
"lowerIsBetter": {
|
||||
"description": "Is lower better. Overwrite global",
|
||||
"type": "boolean"
|
||||
},
|
||||
"peak": {
|
||||
"description": "The maximum possible metric value",
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"description": "A common metric value level",
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"description": "Metric value requires attention",
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"description": "Metric value requiring immediate attention",
|
||||
"type": "number"
|
||||
},
|
||||
"remove": {
|
||||
"description": "Remove this metric for this subcluster",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"unit",
|
||||
"scope",
|
||||
"timestep",
|
||||
"aggregation",
|
||||
"peak",
|
||||
"normal",
|
||||
"caution",
|
||||
"alert"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partitions",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"processorType": {
|
||||
"description": "Processor type",
|
||||
"type": "string"
|
||||
},
|
||||
"socketsPerNode": {
|
||||
"description": "Number of sockets per node",
|
||||
"type": "integer"
|
||||
},
|
||||
"coresPerSocket": {
|
||||
"description": "Number of cores per socket",
|
||||
"type": "integer"
|
||||
},
|
||||
"threadsPerCore": {
|
||||
"description": "Number of SMT threads per core",
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateScalar": {
|
||||
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"description": "Theoretical node peak memory bandwidth in GB/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodes": {
|
||||
"description": "Node list expression",
|
||||
"type": "string"
|
||||
},
|
||||
"topology": {
|
||||
"description": "Node topology",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"node": {
|
||||
"description": "HwTread lists of node",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"socket": {
|
||||
"description": "HwTread lists of sockets",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryDomain": {
|
||||
"description": "HwTread lists of memory domains",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"die": {
|
||||
"description": "HwTread lists of dies",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"core": {
|
||||
"description": "HwTread lists of cores",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"description": "List of of accelerator devices",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique device id"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The accelerator type",
|
||||
"enum": [
|
||||
"Nvidia GPU",
|
||||
"AMD GPU",
|
||||
"Intel GPU"
|
||||
]
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The accelerator model"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"model"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node",
|
||||
"socket",
|
||||
"memoryDomain"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"nodes",
|
||||
"topology",
|
||||
"processorType",
|
||||
"socketsPerNode",
|
||||
"coresPerSocket",
|
||||
"threadsPerCore",
|
||||
"flopRateScalar",
|
||||
"flopRateSimd",
|
||||
"memoryBandwidth"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"metricConfig",
|
||||
"subClusters"
|
||||
]
|
||||
}
|
||||
|
@ -1,433 +1,498 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedfs://config.schema.json",
|
||||
"title": "cc-backend configuration file schema",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"addr": {
|
||||
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
|
||||
"type": "string"
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedfs://config.schema.json",
|
||||
"title": "cc-backend configuration file schema",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"addr": {
|
||||
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
|
||||
"type": "string"
|
||||
},
|
||||
"apiAllowedIPs": {
|
||||
"description": "Addresses from which secured API endpoints can be reached",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"user": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
},
|
||||
"group": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
},
|
||||
"disable-authentication": {
|
||||
"description": "Disable authentication (for everything: API, Web-UI, ...).",
|
||||
"type": "boolean"
|
||||
},
|
||||
"embed-static-files": {
|
||||
"description": "If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"static-files": {
|
||||
"description": "Folder where static assets can be found, if embed-static-files is false.",
|
||||
"type": "string"
|
||||
},
|
||||
"db-driver": {
|
||||
"description": "sqlite3 or mysql (mysql will work for mariadb as well).",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sqlite3",
|
||||
"mysql"
|
||||
]
|
||||
},
|
||||
"db": {
|
||||
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
|
||||
"type": "string"
|
||||
},
|
||||
"archive": {
|
||||
"description": "Configuration keys for job-archive",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {
|
||||
"description": "Backend type for job-archive",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"file",
|
||||
"s3"
|
||||
]
|
||||
},
|
||||
"user": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
"path": {
|
||||
"description": "Path to job archive for file backend",
|
||||
"type": "string"
|
||||
},
|
||||
"group": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
"compression": {
|
||||
"description": "Setup automatic compression for jobs older than number of days",
|
||||
"type": "integer"
|
||||
},
|
||||
"disable-authentication": {
|
||||
"description": "Disable authentication (for everything: API, Web-UI, ...).",
|
||||
"type": "boolean"
|
||||
},
|
||||
"embed-static-files": {
|
||||
"description": "If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"static-files": {
|
||||
"description": "Folder where static assets can be found, if embed-static-files is false.",
|
||||
"type": "string"
|
||||
},
|
||||
"db-driver": {
|
||||
"description": "sqlite3 or mysql (mysql will work for mariadb as well).",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sqlite3",
|
||||
"mysql"
|
||||
]
|
||||
},
|
||||
"db": {
|
||||
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
|
||||
"type": "string"
|
||||
},
|
||||
"job-archive": {
|
||||
"description": "Configuration keys for job-archive",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {
|
||||
"description": "Backend type for job-archive",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"file",
|
||||
"s3"
|
||||
]
|
||||
},
|
||||
"path": {
|
||||
"description": "Path to job archive for file backend",
|
||||
"type": "string"
|
||||
},
|
||||
"compression": {
|
||||
"description": "Setup automatic compression for jobs older than number of days",
|
||||
"type": "integer"
|
||||
},
|
||||
"retention": {
|
||||
"description": "Configuration keys for retention",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"policy": {
|
||||
"description": "Retention policy",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"none",
|
||||
"delete",
|
||||
"move"
|
||||
]
|
||||
},
|
||||
"includeDB": {
|
||||
"description": "Also remove jobs from database",
|
||||
"type": "boolean"
|
||||
},
|
||||
"age": {
|
||||
"description": "Act on jobs with startTime older than age (in days)",
|
||||
"type": "integer"
|
||||
},
|
||||
"location": {
|
||||
"description": "The target directory for retention. Only applicable for retention move.",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"policy"
|
||||
]
|
||||
}
|
||||
"retention": {
|
||||
"description": "Configuration keys for retention",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"policy": {
|
||||
"description": "Retention policy",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"none",
|
||||
"delete",
|
||||
"move"
|
||||
]
|
||||
},
|
||||
"required": [
|
||||
"kind"
|
||||
]
|
||||
"includeDB": {
|
||||
"description": "Also remove jobs from database",
|
||||
"type": "boolean"
|
||||
},
|
||||
"age": {
|
||||
"description": "Act on jobs with startTime older than age (in days)",
|
||||
"type": "integer"
|
||||
},
|
||||
"location": {
|
||||
"description": "The target directory for retention. Only applicable for retention move.",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"policy"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"kind"
|
||||
]
|
||||
},
|
||||
"disable-archive": {
|
||||
"description": "Keep all metric data in the metric data repositories, do not write to the job-archive.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"validate": {
|
||||
"description": "Validate all input json documents against json schema.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"session-max-age": {
|
||||
"description": "Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!",
|
||||
"type": "string"
|
||||
},
|
||||
"https-cert-file": {
|
||||
"description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"https-key-file": {
|
||||
"description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"redirect-http-to": {
|
||||
"description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.",
|
||||
"type": "string"
|
||||
},
|
||||
"stop-jobs-exceeding-walltime": {
|
||||
"description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.",
|
||||
"type": "integer"
|
||||
},
|
||||
"short-running-jobs-duration": {
|
||||
"description": "Do not show running jobs shorter than X seconds.",
|
||||
"type": "integer"
|
||||
},
|
||||
"emission-constant": {
|
||||
"description": ".",
|
||||
"type": "integer"
|
||||
},
|
||||
"cron-frequency": {
|
||||
"description": "Frequency of cron job workers.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"duration-worker": {
|
||||
"description": "Duration Update Worker [Defaults to '5m']",
|
||||
"type": "string"
|
||||
},
|
||||
"disable-archive": {
|
||||
"description": "Keep all metric data in the metric data repositories, do not write to the job-archive.",
|
||||
"type": "boolean"
|
||||
"footprint-worker": {
|
||||
"description": "Metric-Footprint Update Worker [Defaults to '10m']",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"enable-resampling": {
|
||||
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trigger": {
|
||||
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||
"type": "integer"
|
||||
},
|
||||
"validate": {
|
||||
"description": "Validate all input json documents against json schema.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"session-max-age": {
|
||||
"description": "Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!",
|
||||
"type": "string"
|
||||
},
|
||||
"https-cert-file": {
|
||||
"description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"https-key-file": {
|
||||
"description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"redirect-http-to": {
|
||||
"description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.",
|
||||
"type": "string"
|
||||
},
|
||||
"stop-jobs-exceeding-walltime": {
|
||||
"description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.",
|
||||
"resolutions": {
|
||||
"description": "Array of resampling target resolutions, in seconds.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"trigger",
|
||||
"resolutions"
|
||||
]
|
||||
},
|
||||
"jwts": {
|
||||
"description": "For JWT token authentication.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"max-age": {
|
||||
"description": "Configure how long a token is valid. As string parsable by time.ParseDuration()",
|
||||
"type": "string"
|
||||
},
|
||||
"short-running-jobs-duration": {
|
||||
"description": "Do not show running jobs shorter than X seconds.",
|
||||
"type": "integer"
|
||||
"cookieName": {
|
||||
"description": "Cookie that should be checked for a JWT token.",
|
||||
"type": "string"
|
||||
},
|
||||
"jwts": {
|
||||
"description": "For JWT token authentication.",
|
||||
"validateUser": {
|
||||
"description": "Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"trustedIssuer": {
|
||||
"description": "Issuer that should be accepted when validating external JWTs ",
|
||||
"type": "string"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "Add non-existent user to DB at login attempt with values provided in JWT.",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"max-age"
|
||||
]
|
||||
},
|
||||
"oidc": {
|
||||
"provider": {
|
||||
"description": "",
|
||||
"type": "string"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "",
|
||||
"type": "boolean"
|
||||
},
|
||||
"updateUserOnLogin": {
|
||||
"description": "",
|
||||
"type": "boolean"
|
||||
},
|
||||
"required": [
|
||||
"provider"
|
||||
]
|
||||
},
|
||||
"ldap": {
|
||||
"description": "For LDAP Authentication and user synchronisation.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"description": "URL of LDAP directory server.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_base": {
|
||||
"description": "Base DN of user tree root.",
|
||||
"type": "string"
|
||||
},
|
||||
"search_dn": {
|
||||
"description": "DN for authenticating LDAP admin account with general read rights.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_bind": {
|
||||
"description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_filter": {
|
||||
"description": "Filter to extract users for syncing.",
|
||||
"type": "string"
|
||||
},
|
||||
"username_attr": {
|
||||
"description": "Attribute with full username. Default: gecos",
|
||||
"type": "string"
|
||||
},
|
||||
"sync_interval": {
|
||||
"description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.",
|
||||
"type": "string"
|
||||
},
|
||||
"sync_del_old_users": {
|
||||
"description": "Delete obsolete users in database.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "Add non-existent user to DB at login attempt if user exists in Ldap directory",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"url",
|
||||
"user_base",
|
||||
"search_dn",
|
||||
"user_bind",
|
||||
"user_filter"
|
||||
]
|
||||
},
|
||||
"clusters": {
|
||||
"description": "Configuration for the clusters to be displayed.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "The name of the cluster.",
|
||||
"type": "string"
|
||||
},
|
||||
"metricDataRepository": {
|
||||
"description": "Type of the metric data repository for this cluster",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"max-age": {
|
||||
"description": "Configure how long a token is valid. As string parsable by time.ParseDuration()",
|
||||
"type": "string"
|
||||
},
|
||||
"cookieName": {
|
||||
"description": "Cookie that should be checked for a JWT token.",
|
||||
"type": "string"
|
||||
},
|
||||
"validateUser": {
|
||||
"description": "Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"trustedIssuer": {
|
||||
"description": "Issuer that should be accepted when validating external JWTs ",
|
||||
"type": "string"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "Add non-existent user to DB at login attempt with values provided in JWT.",
|
||||
"type": "boolean"
|
||||
}
|
||||
"kind": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"influxdb",
|
||||
"prometheus",
|
||||
"cc-metric-store",
|
||||
"test"
|
||||
]
|
||||
},
|
||||
"url": {
|
||||
"type": "string"
|
||||
},
|
||||
"token": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"max-age"
|
||||
"kind",
|
||||
"url"
|
||||
]
|
||||
},
|
||||
"ldap": {
|
||||
"description": "For LDAP Authentication and user synchronisation.",
|
||||
},
|
||||
"filterRanges": {
|
||||
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"description": "URL of LDAP directory server.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_base": {
|
||||
"description": "Base DN of user tree root.",
|
||||
"type": "string"
|
||||
},
|
||||
"search_dn": {
|
||||
"description": "DN for authenticating LDAP admin account with general read rights.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_bind": {
|
||||
"description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_filter": {
|
||||
"description": "Filter to extract users for syncing.",
|
||||
"type": "string"
|
||||
},
|
||||
"username_attr": {
|
||||
"description": "Attribute with full username. Default: gecos",
|
||||
"type": "string"
|
||||
},
|
||||
"sync_interval": {
|
||||
"description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.",
|
||||
"type": "string"
|
||||
},
|
||||
"sync_del_old_users": {
|
||||
"description": "Delete obsolete users in database.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "Add non-existent user to DB at login attempt if user exists in Ldap directory",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"url",
|
||||
"user_base",
|
||||
"search_dn",
|
||||
"user_bind",
|
||||
"user_filter"
|
||||
]
|
||||
},
|
||||
"clusters": {
|
||||
"description": "Configuration for the clusters to be displayed.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"numNodes": {
|
||||
"description": "UI slider range for number of nodes",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "The name of the cluster.",
|
||||
"type": "string"
|
||||
},
|
||||
"metricDataRepository": {
|
||||
"description": "Type of the metric data repository for this cluster",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"influxdb",
|
||||
"prometheus",
|
||||
"cc-metric-store",
|
||||
"test"
|
||||
]
|
||||
},
|
||||
"url": {
|
||||
"type": "string"
|
||||
},
|
||||
"token": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"kind",
|
||||
"url"
|
||||
]
|
||||
},
|
||||
"filterRanges": {
|
||||
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"numNodes": {
|
||||
"description": "UI slider range for number of nodes",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "integer"
|
||||
},
|
||||
"to": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"from",
|
||||
"to"
|
||||
]
|
||||
},
|
||||
"duration": {
|
||||
"description": "UI slider range for duration",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "integer"
|
||||
},
|
||||
"to": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"from",
|
||||
"to"
|
||||
]
|
||||
},
|
||||
"startTime": {
|
||||
"description": "UI slider range for start time",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"to": {
|
||||
"type": "null"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"from",
|
||||
"to"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"numNodes",
|
||||
"duration",
|
||||
"startTime"
|
||||
]
|
||||
}
|
||||
"from": {
|
||||
"type": "integer"
|
||||
},
|
||||
"to": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"metricDataRepository",
|
||||
"filterRanges"
|
||||
],
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"ui-defaults": {
|
||||
"description": "Default configuration for web UI",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"plot_general_colorBackground": {
|
||||
"description": "Color plot background according to job average threshold limits",
|
||||
"type": "boolean"
|
||||
},
|
||||
"plot_general_lineWidth": {
|
||||
"description": "Initial linewidth",
|
||||
"from",
|
||||
"to"
|
||||
]
|
||||
},
|
||||
"duration": {
|
||||
"description": "UI slider range for duration",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "integer"
|
||||
},
|
||||
"plot_list_jobsPerPage": {
|
||||
"description": "Jobs shown per page in job lists",
|
||||
},
|
||||
"to": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"plot_view_plotsPerRow": {
|
||||
"description": "Number of plots per row in single job view",
|
||||
"type": "integer"
|
||||
"required": [
|
||||
"from",
|
||||
"to"
|
||||
]
|
||||
},
|
||||
"startTime": {
|
||||
"description": "UI slider range for start time",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"to": {
|
||||
"type": "null"
|
||||
}
|
||||
},
|
||||
"plot_view_showPolarplot": {
|
||||
"description": "Option to toggle polar plot in single job view",
|
||||
"type": "boolean"
|
||||
},
|
||||
"plot_view_showRoofline": {
|
||||
"description": "Option to toggle roofline plot in single job view",
|
||||
"type": "boolean"
|
||||
},
|
||||
"plot_view_showStatTable": {
|
||||
"description": "Option to toggle the node statistic table in single job view",
|
||||
"type": "boolean"
|
||||
},
|
||||
"system_view_selectedMetric": {
|
||||
"description": "Initial metric shown in system view",
|
||||
"type": "string"
|
||||
},
|
||||
"analysis_view_histogramMetrics": {
|
||||
"description": "Metrics to show as job count histograms in analysis view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"analysis_view_scatterPlotMetrics": {
|
||||
"description": "Initial scatter plto configuration in analysis view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 2,
|
||||
"maxItems": 2
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"job_view_nodestats_selectedMetrics": {
|
||||
"description": "Initial metrics shown in node statistics table of single job view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"job_view_polarPlotMetrics": {
|
||||
"description": "Metrics shown in polar plot of single job view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"job_view_selectedMetrics": {
|
||||
"description": "",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"plot_general_colorscheme": {
|
||||
"description": "Initial color scheme",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"plot_list_selectedMetrics": {
|
||||
"description": "Initial metric plots shown in jobs lists",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
}
|
||||
"required": [
|
||||
"from",
|
||||
"to"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"plot_general_colorBackground",
|
||||
"plot_general_lineWidth",
|
||||
"plot_list_jobsPerPage",
|
||||
"plot_view_plotsPerRow",
|
||||
"plot_view_showPolarplot",
|
||||
"plot_view_showRoofline",
|
||||
"plot_view_showStatTable",
|
||||
"system_view_selectedMetric",
|
||||
"analysis_view_histogramMetrics",
|
||||
"analysis_view_scatterPlotMetrics",
|
||||
"job_view_nodestats_selectedMetrics",
|
||||
"job_view_polarPlotMetrics",
|
||||
"job_view_selectedMetrics",
|
||||
"plot_general_colorscheme",
|
||||
"plot_list_selectedMetrics"
|
||||
"numNodes",
|
||||
"duration",
|
||||
"startTime"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"metricDataRepository",
|
||||
"filterRanges"
|
||||
],
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"jwts",
|
||||
"clusters"
|
||||
]
|
||||
"ui-defaults": {
|
||||
"description": "Default configuration for web UI",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"plot_general_colorBackground": {
|
||||
"description": "Color plot background according to job average threshold limits",
|
||||
"type": "boolean"
|
||||
},
|
||||
"plot_general_lineWidth": {
|
||||
"description": "Initial linewidth",
|
||||
"type": "integer"
|
||||
},
|
||||
"plot_list_jobsPerPage": {
|
||||
"description": "Jobs shown per page in job lists",
|
||||
"type": "integer"
|
||||
},
|
||||
"plot_view_plotsPerRow": {
|
||||
"description": "Number of plots per row in single job view",
|
||||
"type": "integer"
|
||||
},
|
||||
"plot_view_showPolarplot": {
|
||||
"description": "Option to toggle polar plot in single job view",
|
||||
"type": "boolean"
|
||||
},
|
||||
"plot_view_showRoofline": {
|
||||
"description": "Option to toggle roofline plot in single job view",
|
||||
"type": "boolean"
|
||||
},
|
||||
"plot_view_showStatTable": {
|
||||
"description": "Option to toggle the node statistic table in single job view",
|
||||
"type": "boolean"
|
||||
},
|
||||
"system_view_selectedMetric": {
|
||||
"description": "Initial metric shown in system view",
|
||||
"type": "string"
|
||||
},
|
||||
"job_view_showFootprint": {
|
||||
"description": "Option to toggle footprint ui in single job view",
|
||||
"type": "boolean"
|
||||
},
|
||||
"job_list_usePaging": {
|
||||
"description": "Option to switch from continous scroll to paging",
|
||||
"type": "boolean"
|
||||
},
|
||||
"analysis_view_histogramMetrics": {
|
||||
"description": "Metrics to show as job count histograms in analysis view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"analysis_view_scatterPlotMetrics": {
|
||||
"description": "Initial scatter plto configuration in analysis view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 2,
|
||||
"maxItems": 2
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"job_view_nodestats_selectedMetrics": {
|
||||
"description": "Initial metrics shown in node statistics table of single job view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"job_view_selectedMetrics": {
|
||||
"description": "Initial metrics shown as plots in single job view",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"plot_general_colorscheme": {
|
||||
"description": "Initial color scheme",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"plot_list_selectedMetrics": {
|
||||
"description": "Initial metric plots shown in jobs lists",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minItems": 1
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"plot_general_colorBackground",
|
||||
"plot_general_lineWidth",
|
||||
"plot_list_jobsPerPage",
|
||||
"plot_view_plotsPerRow",
|
||||
"plot_view_showPolarplot",
|
||||
"plot_view_showRoofline",
|
||||
"plot_view_showStatTable",
|
||||
"system_view_selectedMetric",
|
||||
"job_view_showFootprint",
|
||||
"job_list_usePaging",
|
||||
"analysis_view_histogramMetrics",
|
||||
"analysis_view_scatterPlotMetrics",
|
||||
"job_view_nodestats_selectedMetrics",
|
||||
"job_view_selectedMetrics",
|
||||
"plot_general_colorscheme",
|
||||
"plot_list_selectedMetrics"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"jwts",
|
||||
"clusters",
|
||||
"apiAllowedIPs"
|
||||
]
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user