5 Commits

295 changed files with 21211 additions and 37462 deletions

331
.github/workflows/Release.yml vendored Normal file
View File

@@ -0,0 +1,331 @@
# See: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions
# Workflow name
name: Release
# Run on tag push
on:
push:
tags:
- '**'
jobs:
#
# Build on AlmaLinux 8.5 using golang-1.18.2
#
AlmaLinux-RPM-build:
runs-on: ubuntu-latest
# See: https://hub.docker.com/_/almalinux
container: almalinux:8.5
# The job outputs link to the outputs of the 'rpmrename' step
# Only job outputs can be used in child jobs
outputs:
rpm : ${{steps.rpmrename.outputs.RPM}}
srpm : ${{steps.rpmrename.outputs.SRPM}}
steps:
# Use dnf to install development packages
- name: Install development packages
run: |
dnf --assumeyes group install "Development Tools" "RPM Development Tools"
dnf --assumeyes install wget openssl-devel diffutils delve which npm
dnf --assumeyes install 'dnf-command(builddep)'
# Checkout git repository and submodules
# fetch-depth must be 0 to use git describe
# See: https://github.com/marketplace/actions/checkout
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
fetch-depth: 0
# Use dnf to install build dependencies
- name: Install build dependencies
run: |
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
rpm -i go*.rpm
npm install --global yarn rollup svelte rollup-plugin-svelte
#dnf --assumeyes builddep build/package/cc-backend.spec
- name: RPM build ClusterCockpit
id: rpmbuild
run: make RPM
# AlmaLinux 8.5 is a derivate of RedHat Enterprise Linux 8 (UBI8),
# so the created RPM both contain the substring 'el8' in the RPM file names
# This step replaces the substring 'el8' to 'alma85'. It uses the move operation
# because it is unclear whether the default AlmaLinux 8.5 container contains the
# 'rename' command. This way we also get the new names for output.
- name: Rename RPMs (s/el8/alma85/)
id: rpmrename
run: |
OLD_RPM="${{steps.rpmbuild.outputs.RPM}}"
OLD_SRPM="${{steps.rpmbuild.outputs.SRPM}}"
NEW_RPM="${OLD_RPM/el8/alma85}"
NEW_SRPM=${OLD_SRPM/el8/alma85}
mv "${OLD_RPM}" "${NEW_RPM}"
mv "${OLD_SRPM}" "${NEW_SRPM}"
echo "::set-output name=SRPM::${NEW_SRPM}"
echo "::set-output name=RPM::${NEW_RPM}"
# See: https://github.com/actions/upload-artifact
- name: Save RPM as artifact
uses: actions/upload-artifact@v2
with:
name: cc-backend RPM for AlmaLinux 8.5
path: ${{ steps.rpmrename.outputs.RPM }}
- name: Save SRPM as artifact
uses: actions/upload-artifact@v2
with:
name: cc-backend SRPM for AlmaLinux 8.5
path: ${{ steps.rpmrename.outputs.SRPM }}
#
# Build on UBI 8 using golang-1.18.2
#
UBI-8-RPM-build:
runs-on: ubuntu-latest
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
container: registry.access.redhat.com/ubi8/ubi:8.5-226.1645809065
# The job outputs link to the outputs of the 'rpmbuild' step
outputs:
rpm : ${{steps.rpmbuild.outputs.RPM}}
srpm : ${{steps.rpmbuild.outputs.SRPM}}
steps:
# Use dnf to install development packages
- name: Install development packages
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros rpm-build-libs rpm-libs gcc make python38 git wget openssl-devel diffutils delve which
# Checkout git repository and submodules
# fetch-depth must be 0 to use git describe
# See: https://github.com/marketplace/actions/checkout
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
fetch-depth: 0
# Use dnf to install build dependencies
- name: Install build dependencies
run: |
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
rpm -i go*.rpm
dnf --assumeyes --disableplugin=subscription-manager install npm
npm install --global yarn rollup svelte rollup-plugin-svelte
#dnf --assumeyes builddep build/package/cc-backend.spec
- name: RPM build ClusterCockpit
id: rpmbuild
run: make RPM
# See: https://github.com/actions/upload-artifact
- name: Save RPM as artifact
uses: actions/upload-artifact@v2
with:
name: cc-backend RPM for UBI 8
path: ${{ steps.rpmbuild.outputs.RPM }}
- name: Save SRPM as artifact
uses: actions/upload-artifact@v2
with:
name: cc-backend SRPM for UBI 8
path: ${{ steps.rpmbuild.outputs.SRPM }}
#
# Build on Ubuntu 20.04 using official go 1.19.1 package
#
Ubuntu-focal-build:
runs-on: ubuntu-latest
container: ubuntu:20.04
# The job outputs link to the outputs of the 'debrename' step
# Only job outputs can be used in child jobs
outputs:
deb : ${{steps.debrename.outputs.DEB}}
steps:
# Use apt to install development packages
- name: Install development packages
run: |
apt update && apt --assume-yes upgrade
apt --assume-yes install build-essential sed git wget bash
apt --assume-yes install npm
npm install --global yarn rollup svelte rollup-plugin-svelte
# Checkout git repository and submodules
# fetch-depth must be 0 to use git describe
# See: https://github.com/marketplace/actions/checkout
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
fetch-depth: 0
# Use official golang package
- name: Install Golang
run: |
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version
- name: DEB build ClusterCockpit
id: dpkg-build
run: |
ls -la
pwd
env
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
git config --global --add safe.directory $(pwd)
make DEB
- name: Rename DEB (add '_ubuntu20.04')
id: debrename
run: |
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
echo "::set-output name=DEB::${NEW_DEB_FILE}"
# See: https://github.com/actions/upload-artifact
- name: Save DEB as artifact
uses: actions/upload-artifact@v2
with:
name: cc-backend DEB for Ubuntu 20.04
path: ${{ steps.debrename.outputs.DEB }}
#
# Build on Ubuntu 20.04 using official go 1.19.1 package
#
Ubuntu-jammy-build:
runs-on: ubuntu-latest
container: ubuntu:22.04
# The job outputs link to the outputs of the 'debrename' step
# Only job outputs can be used in child jobs
outputs:
deb : ${{steps.debrename.outputs.DEB}}
steps:
# Use apt to install development packages
- name: Install development packages
run: |
apt update && apt --assume-yes upgrade
apt --assume-yes install build-essential sed git wget bash npm
npm install --global yarn rollup svelte rollup-plugin-svelte
# Checkout git repository and submodules
# fetch-depth must be 0 to use git describe
# See: https://github.com/marketplace/actions/checkout
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
fetch-depth: 0
# Use official golang package
- name: Install Golang
run: |
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version
- name: DEB build ClusterCockpit
id: dpkg-build
run: |
ls -la
pwd
env
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
git config --global --add safe.directory $(pwd)
make DEB
- name: Rename DEB (add '_ubuntu22.04')
id: debrename
run: |
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu22.04.deb"
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
echo "::set-output name=DEB::${NEW_DEB_FILE}"
# See: https://github.com/actions/upload-artifact
- name: Save DEB as artifact
uses: actions/upload-artifact@v2
with:
name: cc-backend DEB for Ubuntu 22.04
path: ${{ steps.debrename.outputs.DEB }}
#
# Create release with fresh RPMs
#
Release:
runs-on: ubuntu-latest
# We need the RPMs, so add dependency
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build, Ubuntu-jammy-build]
steps:
# See: https://github.com/actions/download-artifact
- name: Download AlmaLinux 8.5 RPM
uses: actions/download-artifact@v2
with:
name: cc-backend RPM for AlmaLinux 8.5
- name: Download AlmaLinux 8.5 SRPM
uses: actions/download-artifact@v2
with:
name: cc-backend SRPM for AlmaLinux 8.5
- name: Download UBI 8 RPM
uses: actions/download-artifact@v2
with:
name: cc-backend RPM for UBI 8
- name: Download UBI 8 SRPM
uses: actions/download-artifact@v2
with:
name: cc-backend SRPM for UBI 8
- name: Download Ubuntu 20.04 DEB
uses: actions/download-artifact@v2
with:
name: cc-backend DEB for Ubuntu 20.04
- name: Download Ubuntu 22.04 DEB
uses: actions/download-artifact@v2
with:
name: cc-backend DEB for Ubuntu 22.04
# The download actions do not publish the name of the downloaded file,
# so we re-use the job outputs of the parent jobs. The files are all
# downloaded to the current folder.
# The gh-release action afterwards does not accept file lists but all
# files have to be listed at 'files'. The step creates one output per
# RPM package (2 per distro)
- name: Set RPM variables
id: files
run: |
ALMA_85_RPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.rpm}}")
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
U_2204_DEB=$(basename "${{ needs.Ubuntu-jammy-build.outputs.deb}}")
echo "ALMA_85_RPM::${ALMA_85_RPM}"
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
echo "UBI_8_RPM::${UBI_8_RPM}"
echo "UBI_8_SRPM::${UBI_8_SRPM}"
echo "U_2004_DEB::${U_2004_DEB}"
echo "U_2204_DEB::${U_2204_DEB}"
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
echo "::set-output name=U_2204_DEB::${U_2204_DEB}"
# See: https://github.com/softprops/action-gh-release
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
name: cc-backend-${{github.ref_name}}
files: |
${{ steps.files.outputs.ALMA_85_RPM }}
${{ steps.files.outputs.ALMA_85_SRPM }}
${{ steps.files.outputs.UBI_8_RPM }}
${{ steps.files.outputs.UBI_8_SRPM }}
${{ steps.files.outputs.U_2004_DEB }}
${{ steps.files.outputs.U_2204_DEB }}

View File

@@ -7,7 +7,7 @@ jobs:
- name: Install Go
uses: actions/setup-go@v4
with:
go-version: 1.24.x
go-version: 1.19.x
- name: Checkout code
uses: actions/checkout@v3
- name: Build, Vet & Test

20
.gitignore vendored
View File

@@ -1,23 +1,19 @@
/cc-backend
/var/job-archive
/var/*.db
/var/machine-state
/.env
/config.json
/var/job-archive
/var/machine-state
/var/job.db-shm
/var/job.db-wal
/var/*.db
/var/*.txt
/web/frontend/public/build
/web/frontend/node_modules
/.vscode/*
/archive-migration
/archive-manager
var/job.db-shm
var/job.db-wal
/internal/repository/testdata/job.db-shm
/internal/repository/testdata/job.db-wal
/.vscode/*
dist/
*.db

View File

@@ -34,6 +34,19 @@ builds:
main: ./tools/archive-manager
tags:
- static_build
- env:
- CGO_ENABLED=0
goos:
- linux
goarch:
- amd64
goamd64:
- v3
id: "archive-migration"
binary: archive-migration
main: ./tools/archive-migration
tags:
- static_build
- env:
- CGO_ENABLED=0
goos:
@@ -57,7 +70,7 @@ archives:
{{- else }}{{ .Arch }}{{ end }}
{{- if .Arm }}v{{ .Arm }}{{ end }}
checksum:
name_template: "checksums.txt"
name_template: 'checksums.txt'
snapshot:
name_template: "{{ incpatch .Version }}-next"
changelog:
@@ -87,7 +100,7 @@ changelog:
release:
draft: false
footer: |
Supports job archive version 2 and database version 8.
Supports job archive version 1 and database version 6.
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
# vim: set ts=2 sw=2 tw=0 fo=cnqoj

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) NHR@FAU, University Erlangen-Nuremberg
Copyright (c) 2022 NHR@FAU, University Erlangen-Nuremberg
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

122
Makefile
View File

@@ -2,7 +2,7 @@ TARGET = ./cc-backend
VAR = ./var
CFG = config.json .env
FRONTEND = ./web/frontend
VERSION = 1.4.4
VERSION = 1.2.2
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
@@ -22,23 +22,13 @@ SVELTE_COMPONENTS = status \
header
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/*.js) \
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
$(wildcard $(FRONTEND)/src/config/*.svelte) \
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/*.js) \
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
$(wildcard $(FRONTEND)/src/header/*.svelte) \
$(wildcard $(FRONTEND)/src/job/*.svelte)
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/*.js) \
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
.PHONY: clean distclean test tags frontend $(TARGET)
.NOTPARALLEL:
@@ -50,15 +40,6 @@ frontend:
$(info ===> BUILD frontend)
cd web/frontend && npm install && npm run build
swagger:
$(info ===> GENERATE swagger)
@go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api
@mv ./api/docs.go ./internal/api/docs.go
graphql:
$(info ===> GENERATE graphql)
@go run github.com/99designs/gqlgen
clean:
$(info ===> CLEAN)
@go clean
@@ -82,7 +63,7 @@ tags:
@ctags -R
$(VAR):
@mkdir -p $(VAR)
@mkdir $(VAR)
config.json:
$(info ===> Initialize config.json file)
@@ -95,3 +76,90 @@ config.json:
$(SVELTE_TARGETS): $(SVELTE_SRC)
$(info ===> BUILD frontend)
cd web/frontend && npm install && npm run build
install: $(TARGET)
@WORKSPACE=$(PREFIX)
@if [ -z "$${WORKSPACE}" ]; then exit 1; fi
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
@install -Dpm 755 $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
@install -Dpm 600 configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
.ONESHELL:
.PHONY: RPM
RPM: build/package/cc-backend.spec
@WORKSPACE="$${PWD}"
@SPECFILE="$${WORKSPACE}/build/package/cc-backend.spec"
# Setup RPM build tree
@eval $$(rpm --eval "ARCH='%{_arch}' RPMDIR='%{_rpmdir}' SOURCEDIR='%{_sourcedir}' SPECDIR='%{_specdir}' SRPMDIR='%{_srcrpmdir}' BUILDDIR='%{_builddir}'")
@mkdir --parents --verbose "$${RPMDIR}" "$${SOURCEDIR}" "$${SPECDIR}" "$${SRPMDIR}" "$${BUILDDIR}"
# Create source tarball
@COMMITISH="HEAD"
@VERS=$$(git describe --tags $${COMMITISH})
@VERS=$${VERS#v}
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
@eval $$(rpmspec --query --queryformat "NAME='%{name}' VERSION='%{version}' RELEASE='%{release}' NVR='%{NVR}' NVRA='%{NVRA}'" --define="VERS $${VERS}" "$${SPECFILE}")
@PREFIX="$${NAME}-$${VERSION}"
@FORMAT="tar.gz"
@SRCFILE="$${SOURCEDIR}/$${PREFIX}.$${FORMAT}"
@git archive --verbose --format "$${FORMAT}" --prefix="$${PREFIX}/" --output="$${SRCFILE}" $${COMMITISH}
# Build RPM and SRPM
@rpmbuild -ba --define="VERS $${VERS}" --rmsource --clean "$${SPECFILE}"
# Report RPMs and SRPMs when in GitHub Workflow
@if [ "$${GITHUB_ACTIONS}" = true ]; then
@ RPMFILE="$${RPMDIR}/$${ARCH}/$${NVRA}.rpm"
@ SRPMFILE="$${SRPMDIR}/$${NVR}.src.rpm"
@ echo "RPM: $${RPMFILE}"
@ echo "SRPM: $${SRPMFILE}"
@ echo "::set-output name=SRPM::$${SRPMFILE}"
@ echo "::set-output name=RPM::$${RPMFILE}"
@fi
.ONESHELL:
.PHONY: DEB
DEB: build/package/cc-backend.deb.control
@BASEDIR=$${PWD}
@WORKSPACE=$${PWD}/.dpkgbuild
@DEBIANDIR=$${WORKSPACE}/debian
@DEBIANBINDIR=$${WORKSPACE}/DEBIAN
@mkdir --parents --verbose $$WORKSPACE $$DEBIANBINDIR
#@mkdir --parents --verbose $$DEBIANDIR
@CONTROLFILE="$${BASEDIR}/build/package/cc-backend.deb.control"
@COMMITISH="HEAD"
@VERS=$$(git describe --tags --abbrev=0 $${COMMITISH})
@VERS=$${VERS#v}
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
@ARCH=$$(uname -m)
@ARCH=$$(echo $$ARCH | sed -e s+'_'+'-'+g)
@if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi
@PREFIX="$${NAME}-$${VERSION}_$${ARCH}"
@SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$${WORKSPACE}"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//')
@SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')"
#@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control
@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control
@mkdir --parents --verbose "$${WORKSPACE}"/$(VAR)
@touch "$${WORKSPACE}"/$(VAR)/job.db
@cd web/frontend && yarn install && yarn build && cd -
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
@cp $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
@chmod 0755 $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
@mkdir --parents --verbose $${WORKSPACE}/etc/$(TARGET)
@cp configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
@chmod 0600 $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/systemd/system
@cp build/package/$(TARGET).service $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
@chmod 0644 $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
@mkdir --parents --verbose $${WORKSPACE}/etc/default
@cp build/package/$(TARGET).config $${WORKSPACE}/etc/default/$(TARGET)
@chmod 0600 $${WORKSPACE}/etc/default/$(TARGET)
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/sysusers.d
@cp build/package/$(TARGET).sysusers $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
@chmod 0644 $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
@DEB_FILE="cc-metric-store_$${VERS}_$${ARCH}.deb"
@dpkg-deb -b $${WORKSPACE} "$$DEB_FILE"
@rm -r "$${WORKSPACE}"
@if [ "$${GITHUB_ACTIONS}" = "true" ]; then
@ echo "::set-output name=DEB::$${DEB_FILE}"
@fi

200
README.md
View File

@@ -1,16 +1,12 @@
# NOTE
Please have a look at the [Release
Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
for breaking changes!
Please have a look at the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for breaking changes!
# ClusterCockpit REST and GraphQL API backend
[![Build](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)
This is a Golang backend implementation for a REST and GraphQL API according to
the [ClusterCockpit
specifications](https://github.com/ClusterCockpit/cc-specifications). It also
the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications). It also
includes a web interface for ClusterCockpit. This implementation replaces the
previous PHP Symfony based ClusterCockpit web interface. The reasons for
switching from PHP Symfony to a Golang based solution are explained
@@ -18,31 +14,31 @@ switching from PHP Symfony to a Golang based solution are explained
## Overview
This is a Golang web backend for the ClusterCockpit job-specific performance
monitoring framework. It provides a REST API for integrating ClusterCockpit with
an HPC cluster batch system and external analysis scripts. Data exchange between
the web front-end and the back-end is based on a GraphQL API. The web frontend
is also served by the backend using [Svelte](https://svelte.dev/) components.
Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using
[Bootstrap Icons](https://icons.getbootstrap.com/).
The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by
default. Optionally it can use a MySQL/MariaDB database server. While there are
metric data backends for the InfluxDB and Prometheus time series databases, the
only tested and supported setup is to use cc-metric-store as the metric data
backend. Documentation on how to integrate ClusterCockpit with other time series
databases will be added in the future.
This is a Golang web backend for the ClusterCockpit job-specific performance monitoring framework.
It provides a REST API for integrating ClusterCockpit with an HPC cluster batch system and external analysis scripts.
Data exchange between the web front-end and the back-end is based on a GraphQL API.
The web frontend is also served by the backend using [Svelte](https://svelte.dev/) components.
Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/).
The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by default.
Optionally it can use a MySQL/MariaDB database server.
While there are metric data backends for the InfluxDB and Prometheus time series databases, the only tested and supported setup is to use cc-metric-store as the metric data backend.
Documentation on how to integrate ClusterCockpit with other time series databases will be added in the future.
Completed batch jobs are stored in a file-based job archive according to
[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
[this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
The backend supports authentication via local accounts, an external LDAP
directory, and JWT tokens. Authorization for APIs is implemented with
[JWT](https://jwt.io/) tokens created with public/private key encryption.
You find a detailed documentation on the [ClusterCockpit
Webpage](https://clustercockpit.org).
You find more detailed information here:
* `./configs/README.md`: Infos about configuration and setup of cc-backend.
* `./init/README.md`: Infos on how to setup cc-backend as systemd service on Linux.
* `./tools/README.md`: Infos on the JWT authorizatin token workflows in ClusterCockpit.
* `./docs`: You can find further documentation here. There is also a Hands-on tutorial that is recommended to get familiar with the ClusterCockpit setup.
## Build requirements
**NOTE**
ClusterCockpit requires a current version of the golang toolchain and node.js.
You can check `go.mod` to see what is the current minimal golang version needed.
@@ -53,7 +49,7 @@ on the Go standard library, it is crucial for security and performance to use a
current version of golang. In addition, an old golang toolchain may limit the supported
versions of third-party packages.
## How to try ClusterCockpit with a demo setup
## How to try ClusterCockpit with a demo setup.
We provide a shell script that downloads demo data and automatically starts the
cc-backend. You will need `wget`, `go`, `node`, `npm` in your path to
@@ -65,37 +61,31 @@ cd ./cc-backend
./startDemo.sh
```
You can also try the demo using the latest release binary.
You can also try the demo using the lates release binary.
Create a folder and put the release binary `cc-backend` into this folder.
Execute the following steps:
``` shell
./cc-backend -init
vim config.json (Add a second cluster entry and name the clusters alex and fritz)
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
./cc-backend -init-db -add-user demo:admin:demo -loglevel info
./cc-backend -server -dev -loglevel info
```
$ ./cc-backend -init
$ vim config.json (Add a second cluster entry and name the clusters alex and fritz)
$ wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
$ tar xf job-archive-demo.tar
$ ./cc-backend -init-db -add-user demo:admin:demo -loglevel info
$ ./cc-backend -server -dev -loglevel info
```
You can access the web interface at [http://localhost:8080](http://localhost:8080).
You can access the web interface at http://localhost:8080.
Credentials for login are `demo:demo`.
Please note that some views do not work without a metric backend (e.g., the
Analysis, Systems and Status views).
## How to build and run
## Howto build and run
There is a Makefile to automate the build of cc-backend. The Makefile supports
the following targets:
* `make`: Initialize `var` directory and build svelte frontend and backend
binary. Note that there is no proper prerequisite handling. Any change of
frontend source files will result in a complete rebuild.
* `make clean`: Clean go build cache and remove binary.
* `make test`: Run the tests that are also run in the GitHub workflow setup.
There is a Makefile to automate the build of cc-backend. The Makefile supports the following targets:
* `$ make`: Initialize `var` directory and build svelte frontend and backend binary. Note that there is no proper prerequesite handling. Any change of frontend source files will result in a complete rebuild.
* `$ make clean`: Clean go build cache and remove binary.
* `$ make test`: Run the tests that are also run in the GitHub workflow setup.
A common workflow for setting up cc-backend from scratch is:
```sh
git clone https://github.com/ClusterCockpit/cc-backend.git
@@ -126,43 +116,89 @@ ln -s <your-existing-job-archive> ./var/job-archive
./cc-backend -help
```
### Run as systemd daemon
To run this program as a daemon, cc-backend comes with a [example systemd setup](./init/README.md).
## Configuration and setup
cc-backend can be used as a local web interface for an existing job archive or
as a server for the ClusterCockpit monitoring framework.
Create your job archive according to [this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
At least one cluster directory with a valid `cluster.json` file is required. If
you configure the job archive from scratch, you must also create the job
archive version file that contains the job archive version as an integer.
You can retrieve the currently supported version by running the following
command:
```
$ ./cc-backend -version
```
It is ok to have no jobs in the job archive.
### Configuration
A configuration file in JSON format must be specified with `-config` to override the default settings.
By default, a `config.json` file located in the current directory of the `cc-backend` process will be loaded even without the `-config` flag.
Documentation of all supported configuration and command line options can be found [here](./configs/README.md).
## Database initialization and migration
Each `cc-backend` version supports a specific database version.
At startup, the version of the sqlite database is checked and `cc-backend` terminates if the version does not match.
`cc-backend` supports the migration of the database schema to the required version with the command line option `-migrate-db`.
If the database file does not exist yet, it will be created and initialized with the command line option `-migrate-db`.
If you want to use a newer database version with an older version of cc-backend, you can downgrade a database with the external tool [migrate](https://github.com/golang-migrate/migrate).
In this case, you must specify the path to the migration files in a current source tree: `./internal/repository/migrations/`.
## Development and testing
When making changes to the REST or GraphQL API, the appropriate code generators must be used.
You must always rebuild `cc-backend` after updating the API files.
### Update GraphQL schema
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API.
The schema can be found in `./api/schema.graphqls`.
After changing it, you need to run `go run github.com/99designs/gqlgen`, which will update `./internal/graph/model`.
If new resolvers are needed, they will be added to `./internal/graph/schema.resolvers.go`, where you will then need to implement them.
If you start `cc-backend` with the `-dev` flag, the GraphQL Playground UI is available at http://localhost:8080/playground.
### Update Swagger UI
This project integrates [swagger ui] (https://swagger.io/tools/swagger-ui/) to document and test its REST API.
The swagger documentation files can be found in `./api/`.
You can generate the swagger-ui configuration by running `go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api `.
You need to move the created `./api/docs.go` to `./internal/api/docs.go`.
If you start cc-backend with the `-dev` flag, the Swagger interface is available
at http://localhost:8080/swagger/.
You must enter a JWT key for a user with the API role.
**NOTE**
The user who owns the JWT key must not be logged into the same browser (have a
running session), or the Swagger requests will not work. It is recommended to
create a separate user that has only the API role.
## Development and testing
In case the REST or GraphQL API is changed the according code generators have to be used.
## Project file structure
* [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
contains the API schema files for the REST and GraphQL APIs. The REST API is
documented in the OpenAPI 3.0 format in
[./api/openapi.yaml](./api/openapi.yaml).
* [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
contains `main.go` for the main application.
* [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
contains documentation about configuration and command line options and required
environment variables. A sample configuration file is provided.
* [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs)
contains more in-depth documentation.
* [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
contains an example of setting up systemd for production use.
* [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
contains library source code that is not intended for use by others.
* [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
contains Go packages that can be used by other projects.
* [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
Additional command line helper tools.
* [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
Commands for getting infos about and existing job archive.
* [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
Tool to convert external pubkey for use in `cc-backend`.
* [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
contains a small application to generate a compatible JWT keypair. You find
documentation on how to use it
[here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
* [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web)
Server-side templates and frontend-related files:
* [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
Svelte components and static assets for the frontend UI
* [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
Server-side Go templates
* [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
Configures the behaviour and generation of
[gqlgen](https://github.com/99designs/gqlgen).
* [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh)
is a shell script that sets up demo data, and builds and starts `cc-backend`.
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api) contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in [./api/openapi.yaml](./api/openapi.yaml).
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend) contains `main.go` for the main application.
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs) contains documentation about configuration and command line options and required environment variables. A sample configuration file is provided.
- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs) contains more in-depth documentation.
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init) contains an example of setting up systemd for production use.
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal) contains library source code that is not intended for use by others.
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg) contains Go packages that can be used by other projects.
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools) Additional command line helper tools.
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager) Commands for getting infos about and existing job archive.
- [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration) Tool to migrate from previous to current job archive version.
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey) Tool to convert external pubkey for use in `cc-backend`.
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair) contains a small application to generate a compatible JWT keypair. You find documentation on how to use it [here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
- [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web) Server-side templates and frontend-related files:
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend) Svelte components and static assets for the frontend UI
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates) Server-side Go templates
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml) Configures the behaviour and generation of [gqlgen](https://github.com/99designs/gqlgen).
- [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh) is a shell script that sets up demo data, and builds and starts `cc-backend`.

View File

@@ -1,47 +1,40 @@
# `cc-backend` version 1.4.4
# `cc-backend` version 1.2.2
Supports job archive version 2 and database version 8.
Supports job archive version 1 and database version 6.
This is a bug fix release of `cc-backend`, the API backend and frontend
This is a minor release of `cc-backend`, the API backend and frontend
implementation of ClusterCockpit.
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
## Breaking changes
** Breaking changes **
The option `apiAllowedIPs` is now a required configuration attribute in
`config.json`. This option restricts access to the admin API.
* The LDAP configuration option `user_filter` was changed and now should not include
the uid wildcard. Example:
- Old: `"user_filter": "(&(objectclass=posixAccount)(uid=*))"`
- New: `"user_filter": "(&(objectclass=posixAccount))"`
To retain the previous behavior that the API is per default accessible from
everywhere set:
* The aggregate job statistic core hours is now computed using the job table
column `num_hwthreads`. In a future release this column will be renamed to
`num_cores`. For correct display of core hours `num_hwthreads` must be correctly
filled on job start. If your existing jobs do not provide the correct value in
this column then you can set this with one SQL INSERT statement. This only applies
if you have exclusive jobs, only. Please be aware that we treat this column as
it is the number of cores. In case you have SMT enabled and `num_hwthreads`
is not the number of cores the core hours will be too high by a factor!
```json
"apiAllowedIPs": [
"*"
]
* The jwts key is now mandatory in config.json. It has to set max-age for
validity. Some key names have changed, please refer to
[config documentation](./configs/README.md) for details.
* The following API endpoints are only accessible from IPs registered using the apiAllowedIPs configuration option:
- `/users/` [GET, POST, DELETE]
- `/user/{id}` [POST]
** NOTE **
If you are using the sqlite3 backend the `PRAGMA` option `foreign_keys` must be
explicitly set to ON. If using the sqlite3 console it is per default set to
OFF! On every console session you must set:
```
sqlite> PRAGMA foreign_keys = ON;
## Breaking changes for minor release 1.4.x
- You need to perform a database migration. Depending on your database size the
migration might require several hours!
- You need to adapt the `cluster.json` configuration files in the job-archive,
add new required attributes to the metric list and after that edit
`./job-archive/version.txt` to version 2. Only metrics that have the footprint
attribute set can be filtered and show up in the footprint UI and polar plot.
- Continuous scrolling is default now in all job lists. You can change this back
to paging globally, also every user can configure to use paging or continuous
scrolling individually.
- Tags have a scope now. Existing tags will get global scope in the database
migration.
## New features
- Enable to delete tags from the web interface
## Known issues
- Currently energy footprint metrics of type energy are ignored for calculating
total energy.
- Resampling for running jobs only works with cc-metric-store
- With energy footprint metrics of type power the unit is ignored and it is
assumed the metric has the unit Watt.
```
Otherwise if you delete jobs the jobtag relation table will not be updated accordingly!

View File

@@ -18,7 +18,6 @@ type Job {
numNodes: Int!
numHWThreads: Int!
numAcc: Int!
energy: Float!
SMT: Int!
exclusive: Int!
partition: String!
@@ -28,8 +27,12 @@ type Job {
tags: [Tag!]!
resources: [Resource!]!
concurrentJobs: JobLinkResultList
footprint: [FootprintValue]
energyFootprint: [EnergyFootprintValue]
memUsedMax: Float
flopsAnyAvg: Float
memBwAvg: Float
loadAvg: Float
metaData: Any
userData: User
}
@@ -42,6 +45,7 @@ type JobLink {
type Cluster {
name: String!
partitions: [String!]! # Slurm partitions
metricConfig: [MetricConfig!]!
subClusters: [SubCluster!]! # Hardware partitions/subclusters
}
@@ -57,24 +61,9 @@ type SubCluster {
flopRateSimd: MetricValue!
memoryBandwidth: MetricValue!
topology: Topology!
metricConfig: [MetricConfig!]!
footprint: [String!]!
}
type FootprintValue {
name: String!
stat: String!
value: Float!
}
type EnergyFootprintValue {
hardware: String!
metric: String!
value: Float!
}
type MetricValue {
name: String
unit: Unit!
value: Float!
}
@@ -113,7 +102,6 @@ type MetricConfig {
normal: Float
caution: Float!
alert: Float!
lowerIsBetter: Boolean
subClusters: [SubClusterConfig!]!
}
@@ -121,7 +109,6 @@ type Tag {
id: ID!
type: String!
name: String!
scope: String!
}
type Resource {
@@ -151,30 +138,6 @@ type Series {
data: [NullableFloat!]!
}
type StatsSeries {
mean: [NullableFloat!]!
median: [NullableFloat!]!
min: [NullableFloat!]!
max: [NullableFloat!]!
}
type JobStatsWithScope {
name: String!
scope: MetricScope!
stats: [ScopedStats!]!
}
type ScopedStats {
hostname: String!
id: String
data: MetricStatistics!
}
type JobStats {
name: String!
stats: MetricStatistics!
}
type Unit {
base: String!
prefix: String
@@ -186,6 +149,12 @@ type MetricStatistics {
max: Float!
}
type StatsSeries {
mean: [NullableFloat!]!
min: [NullableFloat!]!
max: [NullableFloat!]!
}
type MetricFootprints {
metric: String!
data: [NullableFloat!]!
@@ -211,28 +180,6 @@ type NodeMetrics {
metrics: [JobMetricWithName!]!
}
type NodesResultList {
items: [NodeMetrics!]!
offset: Int
limit: Int
count: Int
totalNodes: Int
hasNextPage: Boolean
}
type ClusterSupport {
cluster: String!
subClusters: [String!]!
}
type GlobalMetricListItem {
name: String!
unit: Unit!
scope: MetricScope!
footprint: String
availability: [ClusterSupport!]!
}
type Count {
name: String!
count: Int!
@@ -244,46 +191,36 @@ type User {
email: String!
}
input MetricStatItem {
metricName: String!
range: FloatRange!
}
type Query {
clusters: [Cluster!]! # List of all clusters
tags: [Tag!]! # List of all tags
globalMetrics: [GlobalMetricListItem!]!
user(username: String!): User
allocatedNodes(cluster: String!): [Count!]!
job(id: ID!): Job
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
jobStats(id: ID!, metrics: [String!]): [JobStats!]!
scopedJobStats(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobStatsWithScope!]!
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]!
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList!
}
type Mutation {
createTag(type: String!, name: String!, scope: String!): Tag!
createTag(type: String!, name: String!): Tag!
deleteTag(id: ID!): ID!
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
removeTagFromList(tagIds: [ID!]!): [Int!]!
updateConfiguration(name: String!, value: String!): String
}
type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { range: String, from: Time!, to: Time! }
type TimeRangeOutput { from: Time!, to: Time! }
input JobFilter {
tags: [ID!]
@@ -295,7 +232,6 @@ input JobFilter {
cluster: StringInput
partition: StringInput
duration: IntRange
energy: FloatRange
minRunningFor: Int
@@ -305,14 +241,17 @@ input JobFilter {
startTime: TimeRange
state: [JobState!]
metricStats: [MetricStatItem!]
flopsAnyAvg: FloatRange
memBwAvg: FloatRange
loadAvg: FloatRange
memUsedMax: FloatRange
exclusive: Int
node: StringInput
}
input OrderByInput {
field: String!
type: String!,
order: SortDirectionEnum! = ASC
}
@@ -330,20 +269,15 @@ input StringInput {
in: [String!]
}
input IntRange { from: Int!, to: Int! }
input TimeRange { range: String, from: Time, to: Time }
input FloatRange {
from: Float!
to: Float!
}
input IntRange { from: Int!, to: Int! }
input FloatRange { from: Float!, to: Float! }
input TimeRange { from: Time, to: Time }
type JobResultList {
items: [Job!]!
offset: Int
limit: Int
count: Int
hasNextPage: Boolean
}
type JobLinkResultList {
@@ -360,7 +294,6 @@ type HistoPoint {
type MetricHistoPoints {
metric: String!
unit: String!
stat: String
data: [MetricHistoPoint!]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
CC_USER=clustercockpit
CC_GROUP=clustercockpit
CC_HOME=/tmp
LOG_DIR=/var/log
DATA_DIR=/var/run/cc-backend
MAX_OPEN_FILES=10000
CONF_DIR=/etc/cc-backend
CONF_FILE=/etc/cc-backend/cc-backend.json
RESTART_ON_UPGRADE=true

View File

@@ -0,0 +1,12 @@
Package: cc-backend
Version: {VERSION}
Installed-Size: {INSTALLED_SIZE}
Architecture: {ARCH}
Maintainer: thomas.gruber@fau.de
Depends: libc6 (>= 2.2.1)
Build-Depends: debhelper-compat (= 13), git, golang-go, npm, yarn
Description: ClusterCockpit backend and web frontend
Homepage: https://github.com/ClusterCockpit/cc-backend
Source: cc-backend
Rules-Requires-Root: no

View File

@@ -0,0 +1,18 @@
[Unit]
Description=ClusterCockpit backend and web frontend (cc-backend)
Documentation=https://github.com/ClusterCockpit/cc-backend
Wants=network-online.target
After=network-online.target
[Service]
EnvironmentFile=/etc/default/cc-backend
Type=simple
User=clustercockpit
Group=clustercockpit
Restart=on-failure
TimeoutStopSec=100
LimitNOFILE=infinity
ExecStart=/usr/bin/cc-backend --config ${CONF_FILE}
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,70 @@
Name: cc-backend
Version: %{VERS}
Release: 1%{?dist}
Summary: ClusterCockpit backend and web frontend
License: MIT
Source0: %{name}-%{version}.tar.gz
#BuildRequires: go-toolset
#BuildRequires: systemd-rpm-macros
#BuildRequires: npm
Provides: %{name} = %{version}
%description
ClusterCockpit backend and web frontend
%global debug_package %{nil}
%prep
%autosetup
%build
#CURRENT_TIME=$(date +%Y-%m-%d:T%H:%M:\%S)
#LD_FLAGS="-s -X main.buildTime=${CURRENT_TIME} -X main.version=%{VERS}"
mkdir ./var
touch ./var/job.db
cd web/frontend && yarn install && yarn build && cd -
go build -ldflags="-s -X main.version=%{VERS}" ./cmd/cc-backend
%install
# Install cc-backend
#make PREFIX=%{buildroot} install
install -Dpm 755 cc-backend %{buildroot}/%{_bindir}/%{name}
install -Dpm 0600 configs/config.json %{buildroot}%{_sysconfdir}/%{name}/%{name}.json
# Integrate into system
install -Dpm 0644 build/package/%{name}.service %{buildroot}%{_unitdir}/%{name}.service
install -Dpm 0600 build/package/%{name}.config %{buildroot}%{_sysconfdir}/default/%{name}
install -Dpm 0644 build/package/%{name}.sysusers %{buildroot}%{_sysusersdir}/%{name}.conf
%check
# go test should be here... :)
%pre
%sysusers_create_package scripts/%{name}.sysusers
%post
%systemd_post %{name}.service
%preun
%systemd_preun %{name}.service
%files
# Binary
%attr(-,clustercockpit,clustercockpit) %{_bindir}/%{name}
# Config
%dir %{_sysconfdir}/%{name}
%attr(0600,clustercockpit,clustercockpit) %config(noreplace) %{_sysconfdir}/%{name}/%{name}.json
# Systemd
%{_unitdir}/%{name}.service
%{_sysconfdir}/default/%{name}
%{_sysusersdir}/%{name}.conf
%changelog
* Mon Mar 07 2022 Thomas Gruber - 0.1
- Initial metric store implementation

View File

@@ -0,0 +1,2 @@
#Type Name ID GECOS Home directory Shell
u clustercockpit - "User for ClusterCockpit" /run/cc-backend /sbin/nologin

View File

@@ -1,33 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import "flag"
var (
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
)
func cliInit() {
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize sqlite database file, config.json and .env")
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: <username>:[admin,support,manager,api,user]:<password>")
flag.StringVar(&flagDelUser, "del-user", "", "Remove a existing user. Argument format: <username>")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug, info (default), warn, err, crit]`")
flag.Parse()
}

View File

@@ -1,95 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"os"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
const envString = `
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
# You can generate your own keypair using the gen-keypair tool
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
`
const configString = `
{
"addr": "127.0.0.1:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2000h"
},
"apiAllowedIPs": [
"*"
],
"enable-resampling": {
"trigger": 30,
"resolutions": [
600,
300,
120,
60
]
},
"clusters": [
{
"name": "name",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2023-01-01T00:00:00Z",
"to": null
}
}
}
]
}
`
func initEnv() {
if util.CheckFileExists("var") {
log.Exit("Directory ./var already exists. Cautiously exiting application initialization.")
}
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
log.Abortf("Could not write default ./config.json with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
}
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
log.Abortf("Could not write default ./.env file with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
}
if err := os.Mkdir("var", 0o777); err != nil {
log.Abortf("Could not create default ./var folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
}
err := repository.MigrateDB("sqlite3", "./var/job.db")
if err != nil {
log.Abortf("Could not initialize default sqlite3 database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error())
}
}

View File

@@ -1,52 +1,157 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"net"
"net/http"
"os"
"os/signal"
"runtime"
"runtime/debug"
"strings"
"sync"
"syscall"
"time"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/99designs/gqlgen/graphql/handler"
"github.com/99designs/gqlgen/graphql/playground"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/internal/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/web"
"github.com/go-co-op/gocron"
"github.com/google/gops/agent"
"github.com/gorilla/handlers"
"github.com/gorilla/mux"
httpSwagger "github.com/swaggo/http-swagger"
_ "github.com/go-sql-driver/mysql"
_ "github.com/mattn/go-sqlite3"
)
const logoString = `
_____ _ _ ____ _ _ _
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
____ _ _ ____ _ _ _
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
\_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|_|
`
const envString = `
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
# You can generate your own keypair using the gen-keypair tool
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
`
const configString = `
{
"addr": "127.0.0.1:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [
{
"name": "name",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2023-01-01T00:00:00Z",
"to": null
}
}
}
]
}
`
var (
date string
commit string
version string
)
func initEnv() {
if util.CheckFileExists("var") {
fmt.Print("Directory ./var already exists. Exiting!\n")
os.Exit(0)
}
if err := os.WriteFile("config.json", []byte(configString), 0666); err != nil {
log.Fatalf("Writing config.json failed: %s", err.Error())
}
if err := os.WriteFile(".env", []byte(envString), 0666); err != nil {
log.Fatalf("Writing .env failed: %s", err.Error())
}
if err := os.Mkdir("var", 0777); err != nil {
log.Fatalf("Mkdir var failed: %s", err.Error())
}
err := repository.MigrateDB("sqlite3", "./var/job.db")
if err != nil {
log.Fatalf("Initialize job.db failed: %s", err.Error())
}
}
func main() {
cliInit()
var flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagDev, flagVersion, flagLogDateTime bool
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
flag.Parse()
if flagVersion {
fmt.Print(logoString)
@@ -61,23 +166,23 @@ func main() {
// Apply config flags for pkg/log
log.Init(flagLogLevel, flagLogDateTime)
// If init flag set, run tasks here before any file dependencies cause errors
if flagInit {
initEnv()
log.Exit("Successfully setup environment!\n" +
"Please review config.json and .env and adjust it to your needs.\n" +
"Add your job-archive at ./var/job-archive.")
fmt.Print("Succesfully setup environment!\n")
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
fmt.Print("Add your job-archive at ./var/job-archive.\n")
os.Exit(0)
}
// See https://github.com/google/gops (Runtime overhead is almost zero)
if flagGops {
if err := agent.Listen(agent.Options{}); err != nil {
log.Abortf("Could not start gops agent with 'gops/agent.Listen(agent.Options{})'. Application startup failed, exited.\nError: %s\n", err.Error())
log.Fatalf("gops/agent.Listen failed: %s", err.Error())
}
}
if err := runtimeEnv.LoadEnv("./.env"); err != nil && !os.IsNotExist(err) {
log.Abortf("Could not parse existing .env file at location './.env'. Application startup failed, exited.\nError: %s\n", err.Error())
log.Fatalf("parsing './.env' file failed: %s", err.Error())
}
// Initialize sub-modules and handle command line flags.
@@ -95,134 +200,310 @@ func main() {
if flagMigrateDB {
err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
if err != nil {
log.Abortf("MigrateDB Failed: Could not migrate '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
log.Fatal(err)
}
log.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
}
if flagRevertDB {
err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB)
if err != nil {
log.Abortf("RevertDB Failed: Could not revert '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1), err.Error())
}
log.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1))
}
if flagForceDB {
err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB)
if err != nil {
log.Abortf("ForceDB Failed: Could not force '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
}
log.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
os.Exit(0)
}
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
db := repository.GetConnection()
var authentication *auth.Authentication
if !config.Keys.DisableAuthentication {
var err error
if authentication, err = auth.Init(); err != nil {
log.Fatalf("auth initialization failed: %v", err)
}
auth.Init()
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
authentication.SessionMaxAge = d
}
if flagNewUser != "" {
parts := strings.SplitN(flagNewUser, ":", 3)
if len(parts) != 3 || len(parts[0]) == 0 {
log.Abortf("Add User: Could not parse supplied argument format: No changes.\n"+
"Want: <username>:[admin,support,manager,api,user]:<password>\n"+
"Have: %s\n", flagNewUser)
log.Fatal("invalid argument format for user creation")
}
ur := repository.GetUserRepository()
if err := ur.AddUser(&schema.User{
Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","),
}); err != nil {
log.Abortf("Add User: Could not add new user authentication for '%s' and roles '%s'.\nError: %s\n", parts[0], parts[1], err.Error())
} else {
log.Printf("Add User: Added new user '%s' with roles '%s'.\n", parts[0], parts[1])
log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err)
}
}
if flagDelUser != "" {
ur := repository.GetUserRepository()
if err := ur.DelUser(flagDelUser); err != nil {
log.Abortf("Delete User: Could not delete user '%s' from DB.\nError: %s\n", flagDelUser, err.Error())
} else {
log.Printf("Delete User: Deleted user '%s' from DB.\n", flagDelUser)
log.Fatalf("deleting user failed: %v", err)
}
}
authHandle := auth.GetAuthInstance()
if flagSyncLDAP {
if authHandle.LdapAuth == nil {
log.Abort("Sync LDAP: LDAP authentication is not configured, could not synchronize. No changes, exited.")
if authentication.LdapAuth == nil {
log.Fatal("cannot sync: LDAP authentication is not configured")
}
if err := authHandle.LdapAuth.Sync(); err != nil {
log.Abortf("Sync LDAP: Could not synchronize, failed with error.\nError: %s\n", err.Error())
if err := authentication.LdapAuth.Sync(); err != nil {
log.Fatalf("LDAP sync failed: %v", err)
}
log.Print("Sync LDAP: LDAP synchronization successfull.")
log.Info("LDAP sync successfull")
}
if flagGenJWT != "" {
ur := repository.GetUserRepository()
user, err := ur.GetUser(flagGenJWT)
if err != nil {
log.Abortf("JWT: Could not get supplied user '%s' from DB. No changes, exited.\nError: %s\n", flagGenJWT, err.Error())
log.Fatalf("could not get user from JWT: %v", err)
}
if !user.HasRole(schema.RoleApi) {
log.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
log.Warnf("user '%s' does not have the API role", user.Username)
}
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
jwt, err := authentication.JwtAuth.ProvideJWT(user)
if err != nil {
log.Abortf("JWT: User '%s' found in DB, but failed to provide JWT.\nError: %s\n", user.Username, err.Error())
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
}
log.Printf("JWT: Successfully generated JWT for user '%s': %s\n", user.Username, jwt)
fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
}
} else if flagNewUser != "" || flagDelUser != "" {
log.Abort("Error: Arguments '--add-user' and '--del-user' can only be used if authentication is enabled. No changes, exited.")
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
}
if err := archive.Init(config.Keys.Archive, config.Keys.DisableArchive); err != nil {
log.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error())
log.Fatalf("failed to initialize archive: %s", err.Error())
}
if err := metricdata.Init(); err != nil {
log.Abortf("Init: Failed to initialize metricdata repository.\nError %s\n", err.Error())
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
}
if flagReinitDB {
if err := importer.InitDB(); err != nil {
log.Abortf("Init DB: Failed to re-initialize repository DB.\nError: %s\n", err.Error())
} else {
log.Print("Init DB: Sucessfully re-initialized repository DB.")
log.Fatalf("failed to re-initialize repository DB: %s", err.Error())
}
}
if flagImportJob != "" {
if err := importer.HandleImportFlag(flagImportJob); err != nil {
log.Abortf("Import Job: Job import failed.\nError: %s\n", err.Error())
} else {
log.Printf("Import Job: Imported Job '%s' into DB.\n", flagImportJob)
log.Fatalf("job import failed: %s", err.Error())
}
}
if !flagServer {
log.Exit("No errors, server flag not set. Exiting cc-backend.")
return
}
archiver.Start(repository.GetJobRepository())
taskManager.Start()
serverInit()
// Setup the http.Handler/Router used by the server
jobRepo := repository.GetJobRepository()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
if os.Getenv("DEBUG") != "1" {
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
// The problem with this is that then, no more stacktrace is printed to stderr.
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
switch e := err.(type) {
case string:
return fmt.Errorf("MAIN > Panic: %s", e)
case error:
return fmt.Errorf("MAIN > Panic caused by: %w", e)
}
return errors.New("MAIN > Internal server error (panic)")
})
}
api := &api.RestApi{
JobRepository: jobRepo,
Resolver: resolver,
MachineStateDir: config.Keys.MachineStateDir,
Authentication: authentication,
}
r := mux.NewRouter()
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo})
}).Methods(http.MethodGet)
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
})
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
})
secured := r.PathPrefix("/").Subrouter()
if !config.Keys.DisableAuthentication {
r.Handle("/login", authentication.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
})
})).Methods(http.MethodPost)
r.Handle("/jwt-login", authentication.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
})
}))
r.Handle("/logout", authentication.Logout(
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusOK)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Bye - ClusterCockpit",
MsgType: "alert-info",
Message: "Logout successful",
Build: buildInfo,
})
}))).Methods(http.MethodPost)
secured.Use(func(next http.Handler) http.Handler {
return authentication.Auth(
// On success;
next,
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Authentication failed - ClusterCockpit",
MsgType: "alert-danger",
Message: err.Error(),
Build: buildInfo,
})
})
})
}
if flagDev {
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
}
secured.Handle("/query", graphQLEndpoint)
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
routerConfig.HandleSearchBar(rw, r, buildInfo)
})
// Mount all /monitoring/... and /api/... routes.
routerConfig.SetupRoutes(secured, buildInfo)
api.MountRoutes(secured)
if config.Keys.EmbedStaticFiles {
if i, err := os.Stat("./var/img"); err == nil {
if i.IsDir() {
log.Info("Use local directory for static images")
r.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
}
}
r.PathPrefix("/").Handler(web.ServeFiles())
} else {
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
}
r.Use(handlers.CompressHandler)
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
r.Use(handlers.CORS(
handlers.AllowCredentials(),
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
handlers.AllowedOrigins([]string{"*"})))
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
} else {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
}
})
var wg sync.WaitGroup
server := http.Server{
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
Handler: handler,
Addr: config.Keys.Addr,
}
// Start http or https server
listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil {
log.Fatalf("starting http listener failed: %v", err)
}
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
go func() {
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
}()
}
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil {
log.Fatalf("loading X509 keypair failed: %v", err)
}
listener = tls.NewListener(listener, &tls.Config{
Certificates: []tls.Certificate{cert},
CipherSuites: []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
},
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
} else {
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
}
// Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that,
// the actual http server can be started.
if err = runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Fatalf("error while preparing server start: %s", err.Error())
}
wg.Add(1)
go func() {
defer wg.Done()
serverStart()
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
log.Fatalf("starting server failed: %v", err)
}
}()
wg.Add(1)
@@ -233,15 +514,117 @@ func main() {
<-sigs
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
serverShutdown()
// First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background())
taskManager.Shutdown()
// Then, wait for any async archivings still pending...
api.JobRepository.WaitForArchiving()
}()
s := gocron.NewScheduler(time.Local)
if config.Keys.StopJobsExceedingWalltime > 0 {
log.Info("Register undead jobs service")
s.Every(1).Day().At("3:00").Do(func() {
err = jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
if err != nil {
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
}
runtime.GC()
})
}
var cfg struct {
Compression int `json:"compression"`
Retention schema.Retention `json:"retention"`
}
cfg.Retention.IncludeDB = true
if err = json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
}
switch cfg.Retention.Policy {
case "delete":
log.Info("Register retention delete service")
s.Every(1).Day().At("4:00").Do(func() {
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().CleanUp(jobs)
if cfg.Retention.IncludeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %s", err.Error())
}
}
})
case "move":
log.Info("Register retention move service")
s.Every(1).Day().At("4:00").Do(func() {
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().Move(jobs, cfg.Retention.Location)
if cfg.Retention.IncludeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %v", err)
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %v", err)
}
}
})
}
if cfg.Compression > 0 {
log.Info("Register compression service")
s.Every(1).Day().At("5:00").Do(func() {
var jobs []*schema.Job
ar := archive.GetHandle()
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
lastTime := ar.CompressLast(startTime)
if startTime == lastTime {
log.Info("Compression Service - Complete archive run")
jobs, err = jobRepo.FindJobsBetween(0, startTime)
} else {
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
}
if err != nil {
log.Warnf("Error while looking for compression jobs: %v", err)
}
ar.Compress(jobs)
})
}
s.StartAsync()
if os.Getenv("GOGC") == "" {
debug.SetGCPercent(25)
}
runtimeEnv.SystemdNotifiy(true, "running")
wg.Wait()
log.Print("Graceful shutdown completed!")
log.Print("Gracefull shutdown completed!")
}

View File

@@ -1,330 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"strings"
"time"
"github.com/99designs/gqlgen/graphql/handler"
"github.com/99designs/gqlgen/graphql/handler/transport"
"github.com/99designs/gqlgen/graphql/playground"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/web"
"github.com/gorilla/handlers"
"github.com/gorilla/mux"
httpSwagger "github.com/swaggo/http-swagger"
)
var (
router *mux.Router
server *http.Server
apiHandle *api.RestApi
)
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
}
func serverInit() {
// Setup the http.Handler/Router used by the server
graph.Init()
resolver := graph.GetResolverInstance()
graphQLServer := handler.New(
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
// graphQLServer.AddTransport(transport.SSE{})
graphQLServer.AddTransport(transport.POST{})
// graphQLServer.AddTransport(transport.Websocket{
// KeepAlivePingInterval: 10 * time.Second,
// Upgrader: websocket.Upgrader{
// CheckOrigin: func(r *http.Request) bool {
// return true
// },
// },
// })
if os.Getenv("DEBUG") != "1" {
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
// The problem with this is that then, no more stacktrace is printed to stderr.
graphQLServer.SetRecoverFunc(func(ctx context.Context, err any) error {
switch e := err.(type) {
case string:
return fmt.Errorf("MAIN > Panic: %s", e)
case error:
return fmt.Errorf("MAIN > Panic caused by: %s", e.Error())
}
return errors.New("MAIN > Internal server error (panic)")
})
}
authHandle := auth.GetAuthInstance()
apiHandle = api.New()
router = mux.NewRouter()
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
info := map[string]any{}
info["hasOpenIDConnect"] = false
if config.Keys.OpenIDConfig != nil {
openIDConnect := auth.NewOIDC(authHandle)
openIDConnect.RegisterEndpoints(router)
info["hasOpenIDConnect"] = true
}
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
log.Debugf("##%v##", info)
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
}).Methods(http.MethodGet)
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
})
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
})
secured := router.PathPrefix("/").Subrouter()
securedapi := router.PathPrefix("/api").Subrouter()
userapi := router.PathPrefix("/userapi").Subrouter()
configapi := router.PathPrefix("/config").Subrouter()
frontendapi := router.PathPrefix("/frontend").Subrouter()
if !config.Keys.DisableAuthentication {
router.Handle("/login", authHandle.Login(
// On success: Handled within Login()
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
})).Methods(http.MethodPost)
router.Handle("/jwt-login", authHandle.Login(
// On success: Handled within Login()
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
}))
router.Handle("/logout", authHandle.Logout(
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusOK)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Bye - ClusterCockpit",
MsgType: "alert-info",
Message: "Logout successful",
Build: buildInfo,
Infos: info,
})
}))).Methods(http.MethodPost)
secured.Use(func(next http.Handler) http.Handler {
return authHandle.Auth(
// On success;
next,
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Authentication failed - ClusterCockpit",
MsgType: "alert-danger",
Message: err.Error(),
Build: buildInfo,
Infos: info,
Redirect: r.RequestURI,
})
})
})
securedapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
userapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthUserApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
configapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthConfigApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
frontendapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthFrontendApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
}
if flagDev {
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
}
secured.Handle("/query", graphQLServer)
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
routerConfig.HandleSearchBar(rw, r, buildInfo)
})
// Mount all /monitoring/... and /api/... routes.
routerConfig.SetupRoutes(secured, buildInfo)
apiHandle.MountApiRoutes(securedapi)
apiHandle.MountUserApiRoutes(userapi)
apiHandle.MountConfigApiRoutes(configapi)
apiHandle.MountFrontendApiRoutes(frontendapi)
if config.Keys.EmbedStaticFiles {
if i, err := os.Stat("./var/img"); err == nil {
if i.IsDir() {
log.Info("Use local directory for static images")
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
}
}
router.PathPrefix("/").Handler(web.ServeFiles())
} else {
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
}
router.Use(handlers.CompressHandler)
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
router.Use(handlers.CORS(
handlers.AllowCredentials(),
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
handlers.AllowedOrigins([]string{"*"})))
}
func serverStart() {
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
} else {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
}
})
server = &http.Server{
ReadTimeout: 20 * time.Second,
WriteTimeout: 20 * time.Second,
Handler: handler,
Addr: config.Keys.Addr,
}
// Start http or https server
listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil {
log.Abortf("Server Start: Starting http listener on '%s' failed.\nError: %s\n", config.Keys.Addr, err.Error())
}
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
go func() {
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
}()
}
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil {
log.Abortf("Server Start: Loading X509 keypair failed. Check options 'https-cert-file' and 'https-key-file' in 'config.json'.\nError: %s\n", err.Error())
}
listener = tls.NewListener(listener, &tls.Config{
Certificates: []tls.Certificate{cert},
CipherSuites: []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
},
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
log.Printf("HTTPS server listening at %s...\n", config.Keys.Addr)
} else {
log.Printf("HTTP server listening at %s...\n", config.Keys.Addr)
}
//
// Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that,
// the actual http server can be started.
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Abortf("Server Start: Error while preparing server start.\nError: %s\n", err.Error())
}
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
log.Abortf("Server Start: Starting server failed.\nError: %s\n", err.Error())
}
}
func serverShutdown() {
// First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background())
// Then, wait for any async archivings still pending...
archiver.WaitForArchiving()
}

93
configs/README.md Normal file
View File

@@ -0,0 +1,93 @@
## Intro
cc-backend requires a configuration file that specifies the cluster systems to be used.
To override the default, specify the location of a json configuration file with the `-config <file path>` command line option.
All security-related configurations, e.g. keys and passwords, are set using
environment variables.
It is supported to set these by means of a `.env` file in the project root.
## Configuration Options
* `addr`: Type string. Address where the http (or https) server will listen on (for example: 'localhost:80'). Default `:8080`.
* `apiAllowedIPs`: Type string array. Addresses from which the secured API endpoints (/users and other auth related endpoints) can be reached
* `user`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
* `group`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
* `disable-authentication`: Type bool. Disable authentication (for everything: API, Web-UI, ...). Default `false`.
* `embed-static-files`: Type bool. If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not. Default `true`.
* `static-files`: Type string. Folder where static assets can be found, if `embed-static-files` is `false`. No default.
* `db-driver`: Type string. 'sqlite3' or 'mysql' (mysql will work for mariadb as well). Default `sqlite3`.
* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`.
* `job-archive`: Type object.
- `kind`: Type string. At them moment only file is supported as value.
- `path`: Type string. Path to the job-archive. Default: `./var/job-archive`.
- `compression`: Type integer. Setup automatic compression for jobs older than number of days.
- `retention`: Type object.
- `policy`: Type string (required). Retention policy. Possible values none, delete,
move.
- `includeDB`: Type boolean. Also remove jobs from database.
- `age`: Type integer. Act on jobs with startTime older than age (in days).
- `location`: Type string. The target directory for retention. Only applicable for retention policy move.
* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`.
* `validate`: Type bool. Validate all input json documents against json schema.
* `session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`.
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
* `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url.
* `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail!
* `stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`.
* `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`.
* `jwts`: Type object (required). For JWT Authentication.
- `max-age`: Type string (required). Configure how long a token is valid. As string parsable by time.ParseDuration().
- `cookieName`: Type string. Cookie that should be checked for a JWT token.
- `vaidateUser`: Type boolean. Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.
- `trustedIssuer`: Type string. Issuer that should be accepted when validating external JWTs.
- `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt with values provided in JWT.
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
- `url`: Type string (required). URL of LDAP directory server.
- `user_base`: Type string (required). Base DN of user tree root.
- `search_dn`: Type string (required). DN for authenticating LDAP admin account with general read rights.
- `user_bind`: Type string (required). Expression used to authenticate users via LDAP bind. Must contain `uid={username}`.
- `user_filter`: Type string (required). Filter to extract users for syncing.
- `username_attr`: Type string. Attribute with full user name. Defaults to `gecos` if not provided.
- `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.
- `sync_del_old_users`: Type boolean. Delete obsolete users in database.
- `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt if user exists in Ldap directory.
* `clusters`: Type array of objects (required)
- `name`: Type string. The name of the cluster.
- `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string)
- `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example:
```
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
```
* `ui-defaults`: Type object. Default configuration for ui views. If overwritten, all options must be provided! Most options can be overwritten by the user via the web interface.
- `analysis_view_histogramMetrics`: Type string array. Metrics to show as job count histograms in analysis view. Default `["flops_any", "mem_bw", "mem_used"]`.
- `analysis_view_scatterPlotMetrics`: Type array of string array. Initial
scatter plot configuration in analysis view. Default `[["flops_any", "mem_bw"], ["flops_any", "cpu_load"], ["cpu_load", "mem_bw"]]`.
- `job_view_nodestats_selectedMetrics`: Type string array. Initial metrics shown in node statistics table of single job view. Default `["flops_any", "mem_bw", "mem_used"]`.
- `job_view_polarPlotMetrics`: Type string array. Metrics shown in polar plot of single job view. Default `["flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"]`.
- `job_view_selectedMetrics`: Type string array. Default `["flops_any", "mem_bw", "mem_used"]`.
- `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`.
- `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`.
- `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`.
- `plot_list_jobsPerPage`: Type int. Jobs shown per page in job lists. Default `50`.
- `plot_list_selectedMetrics`: Type string array. Initial metric plots shown in jobs lists. Default `"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"`.
- `plot_view_plotsPerRow`: Type int. Number of plots per row in single job view. Default `3`.
- `plot_view_showPolarplot`: Type bool. Option to toggle polar plot in single job view. Default `true`.
- `plot_view_showRoofline`: Type bool. Option to toggle roofline plot in single job view. Default `true`.
- `plot_view_showStatTable`: Type bool. Option to toggle the node statistic table in single job view. Default `true`.
- `system_view_selectedMetric`: Type string. Initial metric shown in system view. Default `cpu_load`.
Some of the `ui-defaults` values can be appended by `:<clustername>` in order to have different settings depending on the current cluster. Those are notably `job_view_nodestats_selectedMetrics`, `job_view_polarPlotMetrics`, `job_view_selectedMetrics` and `plot_list_selectedMetrics`.
## Environment Variables
An example env file is found in this directory. Copy it to `.env` in the project root and adapt it for your needs.
* `JWT_PUBLIC_KEY` and `JWT_PRIVATE_KEY`: Base64 encoded Ed25519 keys used for JSON Web Token (JWT) authentication. You can generate your own keypair using `go run ./cmd/gen-keypair/gen-keypair.go`. More information in [README_TOKENS.md](./README_TOKENS.md).
* `SESSION_KEY`: Some random bytes used as secret for cookie-based sessions.
* `LDAP_ADMIN_PASSWORD`: The LDAP admin user password (optional).
* `CROSS_LOGIN_JWT_HS512_KEY`: Used for token based logins via another authentication service.
* `LOGLEVEL`: Can be `err`, `warn`, `info` or `debug` (optional, `warn` by default). Can be used to reduce logging.

51
configs/README_TOKENS.md Normal file
View File

@@ -0,0 +1,51 @@
## Introduction
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs.
JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.
This information can be verified and trusted because it is digitally signed.
In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA.
Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it.
Expiration of the generated tokens as well as the max. length of a browser session can be configured in the `config.json` file described [here](./README.md).
The [Ed25519](https://ed25519.cr.yp.to/) algorithm for signatures was used because it is compatible with other tools that require authentication, such as NATS.io, and because these elliptic-curve methods provide simillar security with smaller keys compared to something like RSA. They are sligthly more expensive to validate, but that effect is negligible.
## JWT Payload
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
Currently ClusterCockpit sets the following claims:
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
* `roles`: An array of strings specifying the roles set for the subject.
* `exp`: Expiration date of the token (only if explicitly configured)
It is important to know that JWTs are not encrypted, only signed. This means that outsiders cannot create new JWTs or modify existing ones, but they are able to read out the username.
## Workflow
1. Create a new ECDSA Public/private keypair:
```
$ go build ./cmd/gen-keypair/
$ ./gen-keypair
```
2. Add keypair in your `.env` file. A template can be found in `./configs`.
When a user logs in via the `/login` page using a browser, a session cookie (secured using the random bytes in the `SESSION_KEY` env. variable you shoud change as well) is used for all requests after the successfull login. The JWTs make it easier to use the APIs of ClusterCockpit using scripts or other external programs. The token is specified n the `Authorization` HTTP header using the [Bearer schema](https://datatracker.ietf.org/doc/html/rfc6750) (there is an example below). Tokens can be issued to users from the configuration view in the Web-UI or the command line. In order to use the token for API endpoints such as `/api/jobs/start_job/`, the user that executes it needs to have the `api` role. Regular users can only perform read-only queries and only look at data connected to jobs they started themselves.
## cc-metric-store
The [cc-metric-store](https://github.com/ClusterCockpit/cc-metric-store) also uses JWTs for authentication. As it does not issue new tokens, it does not need to kown the private key. The public key of the keypair that is used to generate the JWTs that grant access to the `cc-metric-store` can be specified in its `config.json`. When configuring the `metricDataRepository` object in the `cluster.json` file, you can put a token issued by ClusterCockpit itself.
## Setup user and JWT token for REST API authorization
1. Create user:
```
$ ./cc-backend --add-user <username>:api:<password> --no-server
```
2. Issue token for user:
```
$ ./cc-backend --jwt <username> --no-server
```
3. Use issued token token on client side:
```
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
```

View File

@@ -1,70 +1,56 @@
{
"addr": "127.0.0.1:8080",
"short-running-jobs-duration": 300,
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2000h"
},
"enable-resampling": {
"trigger": 30,
"resolutions": [
600,
300,
120,
60
]
},
"apiAllowedIPs": [
"*"
],
"emission-constant": 317,
"clusters": [
{
"name": "fritz",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
"addr": "127.0.0.1:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
{
"name": "alex",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
"jwts": {
"max-age": "2000h"
},
"clusters": [
{
"name": "fritz",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
{
"name": "alex",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
}
}
}
]
]
}

View File

@@ -1,69 +0,0 @@
{
"addr": "127.0.0.1:8080",
"short-running-jobs-duration": 300,
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2000h"
},
"db-driver": "mysql",
"db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit",
"enable-resampling": {
"trigger": 30,
"resolutions": [
600,
300,
120,
60
]
},
"emission-constant": 317,
"clusters": [
{
"name": "fritz",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
},
{
"name": "alex",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
}
]
}

View File

@@ -1,62 +1,50 @@
{
"addr": "0.0.0.0:443",
"ldap": {
"url": "ldaps://test",
"user_base": "ou=people,ou=hpc,dc=test,dc=de",
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
"user_filter": "(&(objectclass=posixAccount))"
},
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
"user": "clustercockpit",
"group": "clustercockpit",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"validate": false,
"apiAllowedIPs": [
"*"
],
"clusters": [
{
"name": "test",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": "eyJhbGciOiJF-E-pQBQ"
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
"addr": "0.0.0.0:443",
"ldap": {
"url": "ldaps://test",
"user_base": "ou=people,ou=hpc,dc=test,dc=de",
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
"user_filter": "(&(objectclass=posixAccount))"
},
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
"user": "clustercockpit",
"group": "clustercockpit",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"validate": true,
"clusters": [
{
"name": "test",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": "eyJhbGciOiJF-E-pQBQ"
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
}
}
}
],
"jwts": {
"cookieName": "",
"validateUser": false,
"max-age": "2000h",
"trustedIssuer": ""
},
"enable-resampling": {
"trigger": 30,
"resolutions": [
600,
300,
120,
60
]
},
"short-running-jobs-duration": 300
],
"jwts": {
"cookieName": "",
"validateUser": false,
"max-age": "2000h",
"trustedIssuer": ""
},
"short-running-jobs-duration": 300
}

View File

@@ -1,12 +0,0 @@
{
"clusters": [
{
"name": "fritz",
"default_metrics": "cpu_load, flops_any, core_power, lustre_open, mem_used, mem_bw, net_bytes_in"
},
{
"name": "alex",
"default_metrics": "flops_any, mem_bw, mem_used, vectorization_ratio"
}
]
}

View File

@@ -117,12 +117,10 @@ foreach my $ln (split("\n", $topo)) {
my $node;
my @sockets;
my @nodeCores;
foreach my $socket ( @{$DOMAINS{socket}} ) {
push @sockets, "[".join(",", @{$socket})."]";
push @nodeCores, join(",", @{$socket});
$node .= join(",", @{$socket})
}
$node = join(",", @nodeCores);
$INFO{sockets} = join(",\n", @sockets);
my @memDomains;
@@ -214,27 +212,9 @@ print <<"END";
"socketsPerNode": $INFO{socketsPerNode},
"coresPerSocket": $INFO{coresPerSocket},
"threadsPerCore": $INFO{threadsPerCore},
"flopRateScalar": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"value": $flopsScalar
},
"flopRateSimd": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"value": $flopsSimd
},
"memoryBandwidth": {
"unit": {
"base": "B/s",
"prefix": "G"
},
"value": $memBw
},
"flopRateScalar": $flopsScalar,
"flopRateSimd": $flopsSimd,
"memoryBandwidth": $memBw,
"nodes": "<FILL IN NODE RANGES>",
"topology": {
"node": [$node],

View File

@@ -0,0 +1,38 @@
# Release versions
Versions are marked according to [semantic versioning] (https://semver.org).
Each version embeds the following static assets in the binary:
* Web frontend with javascript files and all static assets.
* Golang template files for server-side rendering.
* JSON schema files for validation.
* Database migration files.
The remaining external assets are:
* The SQL database used.
* The job archive
* The configuration files `config.json` and `.env`.
The external assets are versioned with integer IDs.
This means that each release binary is bound to specific versions of the SQL
database and the job archive.
The configuration file is checked against the current schema at startup.
The `-migrate-db` command line switch can be used to upgrade the SQL database
to migrate from a previous version to the latest one.
We offer a separate tool `archive-migration` to migrate an existing job archive
archive from the previous to the latest version.
# Versioning of APIs
cc-backend provides two API backends:
* A REST API for querying jobs.
* A GraphQL API for data exchange between web frontend and cc-backend.
The REST API will also be versioned. We still have to decide whether we will also
support older REST API versions by versioning the endpoint URLs.
The GraphQL API is for internal use and will not be versioned.
# How to build
In general it is recommended to use the provided release binary.
In case you want to build build `cc-backend` please always use the provided makefile. This will ensure
that the frontend is also built correctly and that the version in the binary is encoded in the binary.

234
docs/Hands-on.md Normal file
View File

@@ -0,0 +1,234 @@
# Hands-on setup ClusterCockpit from scratch (w/o docker)
## Prerequisites
* perl
* go
* npm
* Optional: curl
* Script migrateTimestamp.pl
## Documentation
You find READMEs or api docs in
* ./cc-backend/configs
* ./cc-backend/init
* ./cc-backend/api
## ClusterCockpit configuration files
### cc-backend
* `./.env` Passwords and Tokens set in the environment
* `./config.json` Configuration options for cc-backend
### cc-metric-store
* `./config.json` Optional to overwrite configuration options
### cc-metric-collector
Not yet included in the hands-on setup.
## Setup Components
Start by creating a base folder for all of the following steps.
* `mkdir clustercockpit`
* `cd clustercockpit`
### Setup cc-backend
* Clone Repository
- `git clone https://github.com/ClusterCockpit/cc-backend.git`
- `cd cc-backend`
* Build
- `make`
* Activate & configure environment for cc-backend
- `cp configs/env-template.txt .env`
- Optional: Have a look via `vim .env`
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
* Back to toplevel `clustercockpit`
- `cd ..`
* Prepare Datafolder and Database file
- `mkdir var`
- `./cc-backend -migrate-db`
### Setup cc-metric-store
* Clone Repository
- `git clone https://github.com/ClusterCockpit/cc-metric-store.git`
- `cd cc-metric-store`
* Build Go Executable
- `go get`
- `go build`
* Prepare Datafolders
- `mkdir -p var/checkpoints`
- `mkdir -p var/archive`
* Update Config
- `vim config.json`
- Exchange existing setting in `metrics` with the following:
```
"clock": { "frequency": 60, "aggregation": null },
"cpi": { "frequency": 60, "aggregation": null },
"cpu_load": { "frequency": 60, "aggregation": null },
"flops_any": { "frequency": 60, "aggregation": null },
"flops_dp": { "frequency": 60, "aggregation": null },
"flops_sp": { "frequency": 60, "aggregation": null },
"ib_bw": { "frequency": 60, "aggregation": null },
"lustre_bw": { "frequency": 60, "aggregation": null },
"mem_bw": { "frequency": 60, "aggregation": null },
"mem_used": { "frequency": 60, "aggregation": null },
"rapl_power": { "frequency": 60, "aggregation": null }
```
* Back to toplevel `clustercockpit`
- `cd ..`
### Setup Demo Data
* `mkdir source-data`
* `cd source-data`
* Download JobArchive-Source:
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz`
- `tar xJf job-archive-dev.tar.xz`
- `mv ./job-archive ./job-archive-source`
- `rm ./job-archive-dev.tar.xz`
* Download CC-Metric-Store Checkpoints:
- `mkdir -p cc-metric-store-source/checkpoints`
- `cd cc-metric-store-source/checkpoints`
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz`
- `tar xf cc-metric-store-checkpoints.tar.xz`
- `rm cc-metric-store-checkpoints.tar.xz`
* Back to `source-data`
- `cd ../..`
* Run timestamp migration script. This may take tens of minutes!
- `cp ../migrateTimestamps.pl .`
- `./migrateTimestamps.pl`
- Expected output:
```
Starting to update start- and stoptimes in job-archive for emmy
Starting to update start- and stoptimes in job-archive for woody
Done for job-archive
Starting to update checkpoint filenames and data starttimes for emmy
Starting to update checkpoint filenames and data starttimes for woody
Done for checkpoints
```
* Copy `cluster.json` files from source to migrated folders
- `cp source-data/job-archive-source/emmy/cluster.json cc-backend/var/job-archive/emmy/`
- `cp source-data/job-archive-source/woody/cluster.json cc-backend/var/job-archive/woody/`
* Initialize Job-Archive in SQLite3 job.db and add demo user
- `cd cc-backend`
- `./cc-backend -init-db -add-user demo:admin:demo`
- Expected output:
```
<6>[INFO] new user "demo" created (roles: ["admin"], auth-source: 0)
<6>[INFO] Building job table...
<6>[INFO] A total of 3936 jobs have been registered in 1.791 seconds.
```
* Back to toplevel `clustercockpit`
- `cd ..`
### Startup both Apps
* In cc-backend root: `$./cc-backend -server -dev`
- Starts Clustercockpit at `http:localhost:8080`
- Log: `<6>[INFO] HTTP server listening at :8080...`
- Use local internet browser to access interface
- You should see and be able to browse finished Jobs
- Metadata is read from SQLite3 database
- Metricdata is read from job-archive/JSON-Files
- Create User in settings (top-right corner)
- Name `apiuser`
- Username `apiuser`
- Role `API`
- Submit & Refresh Page
- Create JTW for `apiuser`
- In Userlist, press `Gen. JTW` for `apiuser`
- Save JWT for later use
* In cc-metric-store root: `$./cc-metric-store`
- Start the cc-metric-store on `http:localhost:8081`, Log:
```
2022/07/15 17:17:42 Loading checkpoints newer than 2022-07-13T17:17:42+02:00
2022/07/15 17:17:45 Checkpoints loaded (5621 files, 319 MB, that took 3.034652s)
2022/07/15 17:17:45 API http endpoint listening on '0.0.0.0:8081'
```
- Does *not* have a graphical interface
- Otpional: Test function by executing:
```
$ curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -D - "http://localhost:8081/api/query" -d "{ \"cluster\": \"emmy\", \"from\": $(expr $(date +%s) - 60), \"to\": $(date +%s), \"queries\": [{
\"metric\": \"flops_any\",
\"host\": \"e1111\"
}] }"
HTTP/1.1 200 OK
Content-Type: application/json
Date: Fri, 15 Jul 2022 13:57:22 GMT
Content-Length: 119
{"results":[[JSON-DATA-ARRAY]]}
```
### Development API web interfaces
The `-dev` flag enables web interfaces to document and test the apis:
* http://localhost:8080/playground - A GraphQL playground. To use it you must have a authenticated session in the same browser.
* http://localhost:8080/swagger - A Swagger UI. To use it you have to be logged out, so no user session in the same browser. Use the JWT token with role Api generate previously to authenticate via http header.
### Use cc-backend API to start job
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
* Click the `/job/start_job` endpoint and click the Try it out button.
* Enter the following json into the request body text area and fill in a recent start timestamp by executing `date +%s`.:
```
{
"jobId": 100000,
"arrayJobId": 0,
"user": "ccdemouser",
"subCluster": "main",
"cluster": "emmy",
"startTime": <date +%s>,
"project": "ccdemoproject",
"resources": [
{"hostname": "e0601"},
{"hostname": "e0823"},
{"hostname": "e0337"},
{"hostname": "e1111"}],
"numNodes": 4,
"numHwthreads": 80,
"walltime": 86400
}
```
* The response body should be the database id of the started job, for example:
```
{
"id": 3937
}
```
* Check in ClusterCockpit
- User `ccdemouser` should appear in Users-Tab with one running job
- It could take up to 5 Minutes until the Job is displayed with some current data (5 Min Short-Job Filter)
- Job then is marked with a green `running` tag
- Metricdata displayed is read from cc-metric-store!
### Use cc-backend API to stop job
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
* Click the `/job/stop_job/{id}` endpoint and click the Try it out button.
* Enter the database id at id that was returned by `start_job` and copy the following into the request body. Replace the timestamp with a recent one:
```
{
"cluster": "emmy",
"jobState": "completed",
"stopTime": <RECENT TS>
}
```
* On success a json document with the job meta data is returned.
* Check in ClusterCockpit
- User `ccdemouser` should appear in Users-Tab with one completed job
- Job is no longer marked with a green `running` tag -> Completed!
- Metricdata displayed is now read from job-archive!
* Check in job-archive
- `cd ./cc-backend/var/job-archive/emmy/100/000`
- `cd $STARTTIME`
- Inspect `meta.json` and `data.json`
## Helper scripts
* In this tarball you can find the perl script `generate_subcluster.pl` that helps to generate the subcluster section for your system.
Usage:
* Log into an exclusive cluster node.
* The LIKWID tools likwid-topology and likwid-bench must be in the PATH!
* `$./generate_subcluster.pl` outputs the subcluster section on `stdout`
Please be aware that
* You have to enter the name and node list for the subCluster manually.
* GPU detection only works if LIKWID was build with Cuda avalable and you run likwid-topology also with Cuda loaded.
* Do not blindly trust the measured peakflops values.
* Because the script blindly relies on the CSV format output by likwid-topology this is a fragile undertaking!

99
docs/JWT-Handling.md Normal file
View File

@@ -0,0 +1,99 @@
## Introduction
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs. JSON
Web Token (JWT) is an open standard (RFC 7519) that defines a compact and
self-contained way for securely transmitting information between parties as a
JSON object. This information can be verified and trusted because it is
digitally signed. In ClusterCockpit JWTs are signed using a public/private key
pair using ECDSA. Because tokens are signed using public/private key pairs, the
signature also certifies that only the party holding the private key is the one
that signed it. Token expiration is set to the configuration option MaxAge.
## JWT Payload
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
Currently ClusterCockpit sets the following claims:
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
* `roles`: An array of strings specifying the roles set for the subject.
## Workflow
1. Create a new ECDSA Public/private keypair:
```
$ go build ./tools/gen-keypair.go
$ ./gen-keypair
```
2. Add keypair in your `.env` file. A template can be found in `./configs`.
There are two usage scenarios:
* The APIs are used during a browser session. API accesses are authorized with
the active session.
* The REST API is used outside a browser session, e.g. by scripts. In this case
you have to issue a token manually. This possible from within the
configuration view or on the command line. It is recommended to issue a JWT
token in this case for a special user that only has the `api` role. By using
different users for different purposes a fine grained access control and
access revocation management is possible.
The token is commonly specified in the Authorization HTTP header using the Bearer schema.
## Setup user and JWT token for REST API authorization
1. Create user:
```
$ ./cc-backend --add-user <username>:api:<Password> --no-server
```
2. Issue token for user:
```
$ ./cc-backend -jwt <username> -no-server
```
3. Use issued token token on client side:
```
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
```
## Accept externally generated JWTs provided via cookie
If there is an external service like an AuthAPI that can generate JWTs and hand
them over to ClusterCockpit via cookies, CC can be configured to accept them:
1. `.env`: CC needs a public ed25519 key to verify foreign JWT signatures.
Public keys in PEM format can be converted with the instructions in
[/tools/convert-pem-pubkey-for-cc](../tools/convert-pem-pubkey-for-cc/Readme.md)
.
```
CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc="
```
2. `config.json`: Insert a name for the cookie (set by the external service)
containing the JWT so that CC knows where to look at. Define a trusted issuer
(JWT claim 'iss'), otherwise it will be rejected. If you want usernames and
user roles from JWTs ('sub' and 'roles' claim) to be validated against CC's
internal database, you need to enable it here. Unknown users will then be
rejected and roles set via JWT will be ignored.
```json
"jwts": {
"cookieName": "access_cc",
"forceJWTValidationViaDatabase": true,
"trustedExternalIssuer": "auth.example.com"
}
```
3. Make sure your external service includes the same issuer (`iss`) in its JWTs.
Example JWT payload:
```json
{
"iat": 1668161471,
"nbf": 1668161471,
"exp": 1668161531,
"sub": "alice",
"roles": [
"user"
],
"jti": "a1b2c3d4-1234-5678-abcd-a1b2c3d4e5f6",
"iss": "auth.example.com"
}
```

78
docs/Job-Archive.md Normal file
View File

@@ -0,0 +1,78 @@
The job archive specifies an exchange format for job meta and performance metric
data. It consists of two parts:
* a [SQLite database schema](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#sqlite-database-schema) for job meta data and performance statistics
* a [Json file format](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#json-file-format) together with a [Directory hierarchy specification](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#directory-hierarchy-specification)
By using an open, portable and simple specification based on files it is
possible to exchange job performance data for research and analysis purposes as
well as use it as a robust way for archiving job performance data to disk.
# SQLite database schema
## Introduction
A SQLite 3 database schema is provided to standardize the job meta data
information in a portable way. The schema also includes optional columns for job
performance statistics (called a job performance footprint). The database acts
as a front end to filter and select subsets of job IDs, that are the keys to get
the full job performance data in the job performance tree hierarchy.
## Database schema
The schema includes 3 tables: the job table, a tag table and a jobtag table
representing the MANY-TO-MANY relation between jobs and tags. The SQL schema is
specified
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/schemas/jobs-sqlite.sql).
Explanation of the various columns including the JSON datatypes is documented
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/job-meta.schema.json).
# Directory hierarchy specification
## Specification
To manage the number of directories within a single directory a tree approach is
used splitting the integer job ID. The job id is split in junks of 1000 each.
Usually 2 layers of directories is sufficient but the concept can be used for an
arbitrary number of layers.
For a 2 layer schema this can be achieved with (code example in Perl):
``` perl
$level1 = $jobID/1000;
$level2 = $jobID%1000;
$dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2);
```
## Example
For the job ID 1034871 the directory path is `./1034/871/`.
# Json file format
## Overview
Every cluster must be configured in a `cluster.json` file.
The job data consists of two files:
* `meta.json`: Contains job meta information and job statistics.
* `data.json`: Contains complete job data with time series
The description of the json format specification is available as [[json
schema|https://json-schema.org/]] format file. The latest version of the json
schema is part of the `cc-backend` source tree. For external reference it is
also available in a separate repository.
## Specification `cluster.json`
The json schema specification is available
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/cluster.schema.json).
## Specification `meta.json`
The json schema specification is available
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-meta.schema.json).
## Specification `data.json`
The json schema specification is available
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-data.schema.json).
Metric time series data is stored for a fixed time step. The time step is set
per metric. If no value is available for a metric time series data timestamp
`null` is entered.

29
docs/adm-customization.md Normal file
View File

@@ -0,0 +1,29 @@
# Overview
Customizing `cc-backend` means changing the logo, legal texts, and the login
template instead of the placeholders. You can also place a text file in `./var`
to add dynamic status or notification messages to the clusterCockpit homepage.
# Replace legal texts
To replace the `imprint.tmpl` and `privacy.tmpl` legal texts, you can place your
version in `./var/`. At startup `cc-backend` will check if `./var/imprint.tmpl` and/or
`./var/privacy.tmpl` exist and use them instead of the built-in placeholders.
You can use the placeholders in `web/templates` as a blueprint.
# Replace login template
To replace the default login layout and styling, you can place your version in
`./var/`. At startup `cc-backend` will check if `./var/login.tmpl` exist and use
it instead of the built-in placeholder. You can use the default temaplte
`web/templates/login.tmpl` as a blueprint.
# Replace logo
To change the logo displayed in the navigation bar, you can provide the file
`logo.png` in the folder `./var/img/`. On startup `cc-backend` will check if the
folder exists and use the images provided there instead of the built-in images.
You may also place additional images there you use in a custom login template.
# Add notification banner on homepage
To add a notification banner you can add a file `notice.txt` to `./var`. As long
as this file is present all text in this file is shown in an info banner on the
homepage.

78
docs/adm-upgrade.md Normal file
View File

@@ -0,0 +1,78 @@
In general, an upgrade is nothing more than a replacement of the binary file.
All the necessary files, except the database file, the configuration file and
the job archive, are embedded in the binary file. It is recommended to use a
directory where the file names of the binary files are named with a version
indicator. This can be, for example, the date or the Unix epoch time. A symbolic
link points to the version to be used. This makes it easier to switch to earlier
versions.
The database and the job archive are versioned. Each release binary supports
specific versions of the database and job archive. If a version mismatch is
detected, the application is terminated and migration is required.
**IMPORTANT NOTE**
It is recommended to make a backup copy of the database before each update. This
is mandatory in case the database needs to be migrated. In the case of sqlite,
this means to stopping `cc-backend` and copying the sqlite database file
somewhere.
# Migrating the database
After you have backed up the database, run the following command to migrate the
database to the latest version:
```
$ ./cc-backend -migrate-db
```
The migration files are embedded in the binary and can also be viewed in the cc
backend [source tree](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository/migrations).
There are separate migration files for both supported
database backends.
We use the [migrate library](https://github.com/golang-migrate/migrate).
If something goes wrong, you can check the status and get the current schema
(here for sqlite):
```
$ sqlite3 var/job.db
```
In the sqlite console execute:
```
.schema
```
to get the current databse schema.
You can query the current version and whether the migration failed with:
```
SELECT * FROM schema_migrations;
```
The first column indicates the current database version and the second column is
a dirty flag indicating whether the migration was successful.
# Migrating the job archive
Job archive migration requires a separate tool (`archive-migration`), which is
part of the cc-backend source tree (build with `go build ./tools/archive-migration`)
and is also provided as part of the releases.
Migration is supported only between two successive releases. The migration tool
migrates the existing job archive to a new job archive. This means that there
must be enough disk space for two complete job archives. If the tool is called
without options:
```
$ ./archive-migration
```
it is assumed that a job archive exists in `./var/job-archive`. The new job
archive is written to `./var/job-archive-new`. Since execution is threaded in case
of a fatal error, it is impossible to determine in which job the error occurred.
In this case, you can run the tool in debug mode (with the `-debug` flag). In
debug mode, threading is disabled and the job ID of each migrated job is output.
Jobs with empty files will be skipped. Between multiple runs of the tools, the
`job-archive-new` directory must be moved or deleted.
The `cluster.json` files in `job-archive-new` must be checked for errors, especially
whether the aggregation attribute is set correctly for all metrics.
Migration takes several hours for relatively large job archives (several hundred
GB). A versioned job archive contains a version.txt file in the root directory
of the job archive. This file contains the version as an unsigned integer.

180
docs/dev-authentication.md Normal file
View File

@@ -0,0 +1,180 @@
# Overview
The authentication is implemented in `internal/auth/`. In `auth.go`
an interface is defined that any authentication provider must fulfill. It also
acts as a dispatcher to delegate the calls to the available authentication
providers.
Two authentication types are available:
* JWT authentication for the REST API that does not create a session cookie
* Session based authentication using a session cookie
The most important routines in auth are:
* `Login()` Handle POST request to login user and start a new session
* `Auth()` Authenticate user and put User Object in context of the request
The http router calls auth in the following cases:
* `r.Handle("/login", authentication.Login( ... )).Methods(http.MethodPost)`:
The POST request on the `/login` route will call the Login callback.
* `r.Handle("/jwt-login", authentication.Login( ... ))`:
Any request on the `/jwt-login` route will call the Login callback. Intended
for use for the JWT token based authenticators.
* Any route in the secured subrouter will always call Auth(), on success it will
call the next handler in the chain, on failure it will render the login
template.
```
secured.Use(func(next http.Handler) http.Handler {
return authentication.Auth(
// On success;
next,
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
// Render login form
})
})
```
A JWT token can be used to initiate an authenticated user
session. This can either happen by calling the login route with a token
provided in a header or via a special cookie containing the JWT token.
For API routes the access is authenticated on every request using the JWT token
and no session is initiated.
# Login
The Login function (located in `auth.go`):
* Extracts the user name and gets the user from the user database table. In case the
user is not found the user object is set to nil.
* Iterates over all authenticators and:
- Calls its `CanLogin` function which checks if the authentication method is
supported for this user.
- Calls its `Login` function to authenticate the user. On success a valid user
object is returned.
- Creates a new session object, stores the user attributes in the session and
saves the session.
- Starts the `onSuccess` http handler
## Local authenticator
This authenticator is applied if
```
return user != nil && user.AuthSource == AuthViaLocalPassword
```
Compares the password provided by the login form to the password hash stored in
the user database table:
```
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
log.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
return nil, fmt.Errorf("Authentication failed")
}
```
## LDAP authenticator
This authenticator is applied if the user was found in the database and its
AuthSource is LDAP:
```
if user != nil {
if user.AuthSource == schema.AuthViaLDAP {
return user, true
}
}
```
If the option `SyncUserOnLogin` is set it tried to sync the user from the LDAP
directory. In case this succeeds the user is persisted to the database and can
login.
Gets the LDAP connection and tries a bind with the provided credentials:
```
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
log.Errorf("AUTH/LDAP > Authentication for user %s failed: %v", user.Username, err)
return nil, fmt.Errorf("Authentication failed")
}
```
## JWT Session authenticator
Login via JWT token will create a session without password.
For login the `X-Auth-Token` header is not supported. This authenticator is
applied if the Authorization header or query parameter login-token is present:
```
return user, r.Header.Get("Authorization") != "" ||
r.URL.Query().Get("login-token") != ""
```
The Login function:
* Parses the token and checks if it is expired
* Check if the signing method is EdDSA or HS256 or HS512
* Check if claims are valid and extracts the claims
* The following claims have to be present:
- `sub`: The subject, in this case this is the username
- `exp`: Expiration in Unix epoch time
- `roles`: String array with roles of user
* In case user does not exist in the database and the option `SyncUserOnLogin`
is set add user to user database table with `AuthViaToken` AuthSource.
* Return valid user object
## JWT Cookie Session authenticator
Login via JWT cookie token will create a session without password.
It is first checked if the required configuration options are set:
* `trustedIssuer`
* `CookieName`
and optionally the environment variable `CROSS_LOGIN_JWT_PUBLIC_KEY` is set.
This authenticator is applied if the configured cookie is present:
```
jwtCookie, err := r.Cookie(cookieName)
if err == nil && jwtCookie.Value != "" {
return true
}
```
The Login function:
* Extracts and parses the token
* Checks if signing method is Ed25519/EdDSA
* In case publicKeyCrossLogin is configured:
- Check if `iss` issuer claim matched trusted issuer from configuration
- Return public cross login key
- Otherwise return standard public key
* Check if claims are valid
* Depending on the option `validateUser` the roles are
extracted from JWT token or taken from user object fetched from database
* Ask browser to delete the JWT cookie
* In case user does not exist in the database and the option `SyncUserOnLogin`
is set add user to user database table with `AuthViaToken` AuthSource.
* Return valid user object
# Auth
The Auth function (located in `auth.go`):
* Returns a new http handler function that is defined right away
* This handler tries two methods to authenticate a user:
- Via a JWT API token in `AuthViaJWT()`
- Via a valid session in `AuthViaSession()`
* If err is not nil and the user object is valid it puts the user object in the
request context and starts the onSuccess http handler
* Otherwise it calls the onFailure handler
## AuthViaJWT
Implemented in JWTAuthenticator:
* Extract token either from header `X-Auth-Token` or `Authorization` with Bearer
prefix
* Parse token and check if it is valid. The Parse routine will also check if the
token is expired.
* If the option `validateUser` is set it will ensure the
user object exists in the database and takes the roles from the database user
* Otherwise the roles are extracted from the roles claim
* Returns a valid user object with AuthType set to AuthToken
## AuthViaSession
* Extracts session
* Get values username, projects, and roles from session
* Returns a valid user object with AuthType set to AuthSession

33
docs/dev-frontend.md Normal file
View File

@@ -0,0 +1,33 @@
## Tips for frontend development
The frontend assets including the Svelte js files are per default embedded in
the bgo binary. To enable a quick turnaround cycle for web development of the
frontend disable embedding of static assets in `config.json`:
```
"embed-static-files": false,
"static-files": "./web/frontend/public/",
```
Start the node build process (in directory `./web/frontend`) in development mode:
```
$ npm run dev
```
This will start the build process in listen mode. Whenever you change a source
files the depending javascript targets will be automatically rebuild.
In case the javascript files are minified you may need to set the production
flag by hand to false in `./web/frontend/rollup.config.mjs`:
```
const production = false
```
Usually this should work automatically.
Because the files are still served by ./cc-backend you have to reload the view
explicitly in your browser.
A common setup is to have three terminals open:
* One running cc-backend (working directory repository root): `./cc-backend -server -dev`
* Another running npm in developer mode (working directory `./web/frontend`): `npm run dev`
* And the last one editing the frontend source files

13
docs/dev-release.md Normal file
View File

@@ -0,0 +1,13 @@
# Steps to prepare a release
1. On `hotfix` branch:
* Update ReleaseNotes.md
* Update version in Makefile
* Commit, push, and pull request
* Merge in master
2. On Linux host:
* Pull master
* Ensure that GitHub Token environment variable `GITHUB_TOKEN` is set
* Create release tag: `git tag v1.1.0 -m release`
* Execute `goreleaser release`

34
docs/dev-testing.md Normal file
View File

@@ -0,0 +1,34 @@
## Overview
We use the standard golang testing environment.
The following conventions are used:
* *White box unit tests*: Tests for internal functionality are placed in files
* *Black box unit tests*: Tests for public interfaces are placed in files
with `<package name>_test.go` and belong to the package `<package_name>_test`.
There only exists one package test file per package.
* *Integration tests*: Tests that use multiple componenents are placed in a
package test file. These are named `<package name>_test.go` and belong to the
package `<package_name>_test`.
* *Test assets*: Any required files are placed in a directory `./testdata`
within each package directory.
## Executing tests
Visual Studio Code has a very good golang test integration.
For debugging a test this is the recommended solution.
The Makefile provided by us has a `test` target that executes:
```
$ go clean -testcache
$ go build ./...
$ go vet ./...
$ go test ./...
```
Of course the commands can also be used on the command line.
For details about golang testing refer to the standard documentation:
* [Testing package](https://pkg.go.dev/testing)
* [go test command](https://pkg.go.dev/cmd/go#hdr-Test_packages)

229
docs/migrateTimestamps.pl Executable file
View File

@@ -0,0 +1,229 @@
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use JSON::PP; # from Perl default install
use Time::Local qw( timelocal ); # from Perl default install
use Time::Piece; # from Perl default install
### JSON
my $json = JSON::PP->new->allow_nonref;
### TIME AND DATE
# now
my $localtime = localtime;
my $epochtime = $localtime->epoch;
# 5 days ago: Via epoch due to possible reverse month borders
my $epochlessfive = $epochtime - (86400 * 5);
my $locallessfive = localtime($epochlessfive);
# Calc like `date --date 'TZ="Europe/Berlin" 0:00 5 days ago' +%s`)
my ($day, $month, $year) = ($locallessfive->mday, $locallessfive->_mon, $locallessfive->year);
my $checkpointStart = timelocal(0, 0, 0, $day, $month, $year);
# for checkpoints
my $halfday = 43200;
### JOB-ARCHIVE
my $archiveTarget = './cc-backend/var/job-archive';
my $archiveSrc = './source-data/job-archive-source';
my @ArchiveClusters;
# Gen folder
if ( not -d $archiveTarget ){
mkdir( $archiveTarget ) or die "Couldn't create $archiveTarget directory, $!";
}
# Get clusters by job-archive/$subfolder
opendir my $dh, $archiveSrc or die "can't open directory: $!";
while ( readdir $dh ) {
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
my $cluster = $_;
push @ArchiveClusters, $cluster;
}
# start for jobarchive
foreach my $cluster ( @ArchiveClusters ) {
print "Starting to update start- and stoptimes in job-archive for $cluster\n";
my $clusterTarget = "$archiveTarget/$cluster";
if ( not -d $clusterTarget ){
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
}
opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
while ( readdir $dhLevel1 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level1 = $_;
if ( -d "$archiveSrc/$cluster/$level1" ) {
opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
while ( readdir $dhLevel2 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level2 = $_;
my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
my $jobOrigin = "$jobSource";
my $jobTargetL1 = "$clusterTarget/$level1";
my $jobTargetL2 = "$jobTargetL1/$level2";
# check if files are directly accessible (old format) else get subfolders as file and update path
if ( ! -e "$jobSource/meta.json") {
opendir(D, "$jobSource") || die "Can't open directory $jobSource: $!\n";
my @folders = readdir(D);
closedir(D);
if (!@folders) {
next;
}
foreach my $folder ( @folders ) {
next if $folder eq '.' or $folder eq '..';
$jobSource = "$jobSource/".$folder;
}
}
# check if subfolder contains file, else skip
if ( ! -e "$jobSource/meta.json") {
print "$jobSource skipped\n";
next;
}
open my $metafh, '<', "$jobSource/meta.json" or die "Can't open file $!";
my $rawstr = do { local $/; <$metafh> };
close($metafh);
my $metadata = $json->decode($rawstr);
# NOTE Start meta.json iteration here
# my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
# Set new startTime: Between 5 days and 1 day before now
# Remove id from attributes
$metadata->{startTime} = $epochtime - (int(rand(432000)) + 86400);
$metadata->{stopTime} = $metadata->{startTime} + $metadata->{duration};
# Add starttime subfolder to target path
my $jobTargetL3 = "$jobTargetL2/".$metadata->{startTime};
if ( not -d $jobTargetL1 ){
mkdir( $jobTargetL1 ) or die "Couldn't create $jobTargetL1 directory, $!";
}
if ( not -d $jobTargetL2 ){
mkdir( $jobTargetL2 ) or die "Couldn't create $jobTargetL2 directory, $!";
}
# target is not directory
if ( not -d $jobTargetL3 ){
mkdir( $jobTargetL3 ) or die "Couldn't create $jobTargetL3 directory, $!";
my $outstr = $json->encode($metadata);
open my $metaout, '>', "$jobTargetL3/meta.json" or die "Can't write to file $!";
print $metaout $outstr;
close($metaout);
open my $datafh, '<', "$jobSource/data.json" or die "Can't open file $!";
my $datastr = do { local $/; <$datafh> };
close($datafh);
open my $dataout, '>', "$jobTargetL3/data.json" or die "Can't write to file $!";
print $dataout $datastr;
close($dataout);
}
}
}
}
}
print "Done for job-archive\n";
sleep(1);
exit;
## CHECKPOINTS
my $checkpTarget = './cc-metric-store/var/checkpoints';
my $checkpSource = './source-data/cc-metric-store-source/checkpoints';
my @CheckpClusters;
# Gen folder
if ( not -d $checkpTarget ){
mkdir( $checkpTarget ) or die "Couldn't create $checkpTarget directory, $!";
}
# Get clusters by cc-metric-store/$subfolder
opendir my $dhc, $checkpSource or die "can't open directory: $!";
while ( readdir $dhc ) {
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
my $cluster = $_;
push @CheckpClusters, $cluster;
}
closedir($dhc);
# start for checkpoints
foreach my $cluster ( @CheckpClusters ) {
print "Starting to update checkpoint filenames and data starttimes for $cluster\n";
my $clusterTarget = "$checkpTarget/$cluster";
if ( not -d $clusterTarget ){
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
}
opendir my $dhLevel1, "$checkpSource/$cluster" or die "can't open directory: $!";
while ( readdir $dhLevel1 ) {
chomp; next if $_ eq '.' or $_ eq '..';
# Nodename as level1-folder
my $level1 = $_;
if ( -d "$checkpSource/$cluster/$level1" ) {
my $nodeSource = "$checkpSource/$cluster/$level1/";
my $nodeOrigin = "$nodeSource";
my $nodeTarget = "$clusterTarget/$level1";
my @files;
if ( -e "$nodeSource/1609459200.json") { # 1609459200 == First Checkpoint time in latest dump
opendir(D, "$nodeSource") || die "Can't open directory $nodeSource: $!\n";
while ( readdir D ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $nodeFile = $_;
push @files, $nodeFile;
}
closedir(D);
my $length = @files;
if (!@files || $length != 14) { # needs 14 files == 7 days worth of data
next;
}
} else {
next;
}
# sort for integer timestamp-filename-part (moduleless): Guarantees start with index == 0 == 1609459200.json
my @sortedFiles = sort { ($a =~ /^([0-9]{10}).json$/)[0] <=> ($b =~ /^([0-9]{10}).json$/)[0] } @files;
if ( not -d $nodeTarget ){
mkdir( $nodeTarget ) or die "Couldn't create $nodeTarget directory, $!";
while (my ($index, $file) = each(@sortedFiles)) {
open my $checkfh, '<', "$nodeSource/$file" or die "Can't open file $!";
my $rawstr = do { local $/; <$checkfh> };
close($checkfh);
my $checkpdata = $json->decode($rawstr);
my $newTimestamp = $checkpointStart + ($index * $halfday);
# Get Diff from old Timestamp
my $timeDiff = $newTimestamp - $checkpdata->{from};
# Set new timestamp
$checkpdata->{from} = $newTimestamp;
foreach my $metric (keys %{$checkpdata->{metrics}}) {
$checkpdata->{metrics}->{$metric}->{start} += $timeDiff;
}
my $outstr = $json->encode($checkpdata);
open my $checkout, '>', "$nodeTarget/$newTimestamp.json" or die "Can't write to file $!";
print $checkout $outstr;
close($checkout);
}
}
}
}
closedir($dhLevel1);
}
print "Done for checkpoints\n";

36
docs/searchbar.md Normal file
View File

@@ -0,0 +1,36 @@
# Docs for ClusterCockpit Searchbar
## Usage
* Searchtags are implemented as `type:<query>` search-string
* Types `jobId, jobName, projectId, username, name, arrayJobId` for roles `admin` and `support`
* `jobName` is jobName as persisted in `job.meta_data` table-column
* `username` is actual account identifier as persisted in `job.user` table-column
* `name` is account owners name as persisted in `user.name` table-column
* Types `jobId, jobName, projectId, arrayJobId` for role `user`
* Examples:
* `jobName:myJob12`
* `jobId:123456`
* `username:abcd100`
* `name:Paul`
* If no searchTag used: Best guess search with the following hierarchy
* `jobId -> username -> name -> projectId -> jobName`
* Destinations:
* JobId: Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters)
* JobName: Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters)
* ProjectId: Job-Table
* Username: Users-Table
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users.
* Name: Users-Table
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users.
* ArrayJobId: Job-Table (Lists all Jobs of Queried ArrayJobId)
* Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match)
* Unprocessable queries will display messages detailing the cause (Info, Warning, Error)
* Spaces trimmed (both for searchTag and queryString)
* ` job12` == `job12`
* `projectID : abcd ` == `projectId:abcd`
* `jobName`- and `name-`queries work with a part of the target-string
* `jobName:myjob` for jobName "myjob_cluster1"
* `name:Paul` for name "Paul Atreides"
* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB.

130
go.mod
View File

@@ -1,91 +1,97 @@
module github.com/ClusterCockpit/cc-backend
go 1.23.5
toolchain go1.24.1
go 1.18
require (
github.com/99designs/gqlgen v0.17.66
github.com/99designs/gqlgen v0.17.40
github.com/ClusterCockpit/cc-units v0.4.0
github.com/Masterminds/squirrel v1.5.4
github.com/coreos/go-oidc/v3 v3.12.0
github.com/go-co-op/gocron/v2 v2.16.0
github.com/go-ldap/ldap/v3 v3.4.10
github.com/go-sql-driver/mysql v1.9.0
github.com/golang-jwt/jwt/v5 v5.2.2
github.com/golang-migrate/migrate/v4 v4.18.2
github.com/google/gops v0.3.28
github.com/gorilla/handlers v1.5.2
github.com/gorilla/mux v1.8.1
github.com/gorilla/sessions v1.4.0
github.com/influxdata/influxdb-client-go/v2 v2.14.0
github.com/jmoiron/sqlx v1.4.0
github.com/mattn/go-sqlite3 v1.14.24
github.com/prometheus/client_golang v1.21.0
github.com/prometheus/common v0.62.0
github.com/Masterminds/squirrel v1.5.3
github.com/go-co-op/gocron v1.25.0
github.com/go-ldap/ldap/v3 v3.4.4
github.com/go-sql-driver/mysql v1.7.0
github.com/golang-jwt/jwt/v4 v4.5.0
github.com/golang-migrate/migrate/v4 v4.15.2
github.com/google/gops v0.3.27
github.com/gorilla/handlers v1.5.1
github.com/gorilla/mux v1.8.0
github.com/gorilla/sessions v1.2.1
github.com/influxdata/influxdb-client-go/v2 v2.12.2
github.com/jmoiron/sqlx v1.3.5
github.com/mattn/go-sqlite3 v1.14.16
github.com/minio/minio-go/v7 v7.0.63
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/common v0.40.0
github.com/qustavo/sqlhooks/v2 v2.1.0
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.16.4
github.com/vektah/gqlparser/v2 v2.5.22
golang.org/x/crypto v0.36.0
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa
golang.org/x/oauth2 v0.27.0
golang.org/x/time v0.5.0
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
github.com/swaggo/http-swagger v1.3.3
github.com/swaggo/swag v1.16.2
github.com/vektah/gqlparser/v2 v2.5.10
golang.org/x/crypto v0.16.0
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
)
require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
github.com/KyleBanks/depth v1.2.1 // indirect
github.com/agnivade/levenshtein v1.2.1 // indirect
github.com/agnivade/levenshtein v1.1.1 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/securecookie v1.1.2 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/containerd/containerd v1.6.18 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect
github.com/deepmap/oapi-codegen v1.12.4 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
github.com/go-openapi/jsonpointer v0.20.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/spec v0.20.9 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/uuid v1.4.0 // indirect
github.com/gorilla/securecookie v1.1.1 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.3 // indirect
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
github.com/jonboulle/clockwork v0.5.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.16.7 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/minio/sha256-simd v1.0.1 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/oapi-codegen/runtime v1.1.1 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/procfs v0.9.0 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/rs/xid v1.5.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sosodev/duration v1.3.1 // indirect
github.com/swaggo/files v1.0.1 // indirect
github.com/urfave/cli/v2 v2.27.5 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
go.uber.org/atomic v1.11.0 // indirect
golang.org/x/mod v0.23.0 // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/sync v0.12.0 // indirect
golang.org/x/sys v0.31.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/tools v0.30.0 // indirect
google.golang.org/protobuf v1.36.5 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/sosodev/duration v1.2.0 // indirect
github.com/swaggo/files v1.0.0 // indirect
github.com/urfave/cli/v2 v2.25.7 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.uber.org/atomic v1.10.0 // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/net v0.19.0 // indirect
golang.org/x/oauth2 v0.5.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.16.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect

2122
go.sum

File diff suppressed because it is too large Load Diff

View File

@@ -30,7 +30,6 @@ resolver:
# gqlgen will search for any type names in the schema in these go packages
# if they match it will use them, otherwise it will generate them.
autobind:
- "github.com/99designs/gqlgen/graphql/introspection"
- "github.com/ClusterCockpit/cc-backend/internal/graph/model"
# This section declares type mapping between the GraphQL and go type systems
@@ -62,50 +61,23 @@ models:
fields:
partitions:
resolver: true
NullableFloat:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
MetricScope:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
MetricValue:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
JobStatistics:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
GlobalMetricListItem:
{
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
}
ClusterSupport:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
Resource:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
JobState:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
TimeRange:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
IntRange:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
JobMetric:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
MetricStatistics:
{
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
}
MetricConfig:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
SubClusterConfig:
{
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
}
Accelerator:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
Topology:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
FilterRanges:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
SubCluster:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
StatsSeries:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }

View File

@@ -1,5 +1,5 @@
[Unit]
Description=ClusterCockpit Web Server
Description=ClusterCockpit Web Server (Go edition)
Documentation=https://github.com/ClusterCockpit/cc-backend
Wants=network-online.target
After=network-online.target

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -14,16 +14,13 @@ import (
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -45,9 +42,6 @@ func setup(t *testing.T) *api.RestApi {
"jwts": {
"max-age": "2m"
},
"apiAllowedIPs": [
"*"
],
"clusters": [
{
"name": "testcluster",
@@ -123,7 +117,7 @@ func setup(t *testing.T) *api.RestApi {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
t.Fatal(err)
}
@@ -150,20 +144,23 @@ func setup(t *testing.T) *api.RestApi {
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
repository.Connect("sqlite3", dbfilepath)
db := repository.GetConnection()
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
t.Fatal(err)
}
if err := metricdata.Init(); err != nil {
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
t.Fatal(err)
}
archiver.Start(repository.GetJobRepository())
auth.Init()
graph.Init()
jobRepo := repository.GetJobRepository()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
return api.New()
return &api.RestApi{
JobRepository: resolver.Repo,
Resolver: resolver,
}
}
func cleanup() {
@@ -178,6 +175,7 @@ func cleanup() {
func TestRestApi(t *testing.T) {
restapi := setup(t)
t.Cleanup(cleanup)
testData := schema.JobData{
"load_one": map[schema.MetricScope]*schema.JobMetric{
schema.MetricScopeNode: {
@@ -194,18 +192,12 @@ func TestRestApi(t *testing.T) {
},
}
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
return testData, nil
}
r := mux.NewRouter()
r.PathPrefix("/api").Subrouter()
r.StrictSlash(true)
restapi.MountApiRoutes(r)
var TestJobId int64 = 123
var TestClusterName string = "testcluster"
var TestStartTime int64 = 123456789
restapi.MountRoutes(r)
const startJobBody string = `{
"jobId": 123,
@@ -221,7 +213,7 @@ func TestRestApi(t *testing.T) {
"exclusive": 1,
"monitoringStatus": 1,
"smt": 1,
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
"tags": [{ "type": "testTagType", "name": "testTagName" }],
"resources": [
{
"hostname": "host123",
@@ -232,33 +224,28 @@ func TestRestApi(t *testing.T) {
"startTime": 123456789
}`
const contextUserKey repository.ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
Projects: make([]string, 0),
Roles: []string{"user"},
AuthType: 0,
AuthSource: 2,
}
var dbid int64
if ok := t.Run("StartJob", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
recorder := httptest.NewRecorder()
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
r.ServeHTTP(recorder, req)
response := recorder.Result()
if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String())
}
resolver := graph.GetResolverInstance()
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
var res api.StartJobApiResponse
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
t.Fatal(err)
}
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
if err != nil {
t.Fatal(err)
}
job.Tags, err = resolver.Job().Tags(ctx, job)
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
if err != nil {
t.Fatal(err)
}
@@ -282,9 +269,11 @@ func TestRestApi(t *testing.T) {
t.Fatalf("unexpected job properties: %#v", job)
}
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
t.Fatalf("unexpected tags: %#v", job.Tags)
}
dbid = res.DBID
}); !ok {
return
}
@@ -300,19 +289,17 @@ func TestRestApi(t *testing.T) {
var stoppedJob *schema.Job
if ok := t.Run("StopJob", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
recorder := httptest.NewRecorder()
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
r.ServeHTTP(recorder, req)
response := recorder.Result()
if response.StatusCode != http.StatusOK {
t.Fatal(response.Status, recorder.Body.String())
}
archiver.WaitForArchiving()
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
restapi.JobRepository.WaitForArchiving()
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
if err != nil {
t.Fatal(err)
}
@@ -340,7 +327,7 @@ func TestRestApi(t *testing.T) {
}
t.Run("CheckArchive", func(t *testing.T) {
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
if err != nil {
t.Fatal(err)
}
@@ -354,12 +341,10 @@ func TestRestApi(t *testing.T) {
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
recorder := httptest.NewRecorder()
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
r.ServeHTTP(recorder, req)
response := recorder.Result()
if response.StatusCode != http.StatusUnprocessableEntity {
t.Fatal(response.Status, recorder.Body.String())
@@ -386,12 +371,10 @@ func TestRestApi(t *testing.T) {
}`
ok := t.Run("StartJobFailed", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
recorder := httptest.NewRecorder()
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
r.ServeHTTP(recorder, req)
response := recorder.Result()
if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String())
@@ -401,10 +384,8 @@ func TestRestApi(t *testing.T) {
t.Fatal("subtest failed")
}
time.Sleep(1 * time.Second)
const stopJobBodyFailed string = `{
"jobId": 12345,
"jobId": 12345,
"cluster": "testcluster",
"jobState": "failed",
@@ -412,18 +393,16 @@ func TestRestApi(t *testing.T) {
}`
ok = t.Run("StopJobFailed", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
recorder := httptest.NewRecorder()
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
r.ServeHTTP(recorder, req)
response := recorder.Result()
if response.StatusCode != http.StatusOK {
t.Fatal(response.Status, recorder.Body.String())
}
archiver.WaitForArchiving()
restapi.JobRepository.WaitForArchiving()
jobid, cluster := int64(12345), "testcluster"
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
if err != nil {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,94 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver
import (
"context"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
var (
archivePending sync.WaitGroup
archiveChannel chan *schema.Job
jobRepo *repository.JobRepository
)
func Start(r *repository.JobRepository) {
archiveChannel = make(chan *schema.Job, 128)
jobRepo = r
go archivingWorker()
}
// Archiving worker thread
func archivingWorker() {
for {
select {
case job, ok := <-archiveChannel:
if !ok {
break
}
start := time.Now()
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := jobRepo.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
stmt := sq.Update("job").Where("job.id = ?", job.ID)
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
continue
}
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
continue
}
// Update the jobs database entry one last time:
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
if err := jobRepo.Execute(stmt); err != nil {
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
continue
}
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
log.Printf("archiving job (dbid: %d) successful", job.ID)
archivePending.Done()
}
}
}
// Trigger async archiving
func TriggerArchiving(job *schema.Job) {
if archiveChannel == nil {
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
}
archivePending.Add(1)
archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
archivePending.Wait()
}

View File

@@ -1,83 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver
import (
"context"
"math"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
scopes := []schema.MetricScope{schema.MetricScopeNode}
// FIXME: Add a config option for this
if job.NumNodes <= 8 {
// This will add the native scope if core scope is not available
scopes = append(scopes, schema.MetricScopeCore)
}
if job.NumAcc > 0 {
scopes = append(scopes, schema.MetricScopeAccelerator)
}
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// This should never happen ?
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
// Round AVG Result to 2 Digits
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if config.Keys.DisableArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -10,19 +10,12 @@ import (
"database/sql"
"encoding/base64"
"errors"
"fmt"
"net"
"net/http"
"os"
"strings"
"sync"
"time"
"golang.org/x/time/rate"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/gorilla/sessions"
@@ -33,37 +26,19 @@ type Authenticator interface {
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
}
var (
initOnce sync.Once
authInstance *Authentication
)
var ipUserLimiters sync.Map
func getIPUserLimiter(ip, username string) *rate.Limiter {
key := ip + ":" + username
limiter, ok := ipUserLimiters.Load(key)
if !ok {
newLimiter := rate.NewLimiter(rate.Every(time.Hour/10), 10)
ipUserLimiters.Store(key, newLimiter)
return newLimiter
}
return limiter.(*rate.Limiter)
}
type Authentication struct {
sessionStore *sessions.CookieStore
sessionStore *sessions.CookieStore
SessionMaxAge time.Duration
authenticators []Authenticator
LdapAuth *LdapAuthenticator
JwtAuth *JWTAuthenticator
LocalAuth *LocalAuthenticator
authenticators []Authenticator
SessionMaxAge time.Duration
}
func (auth *Authentication) AuthViaSession(
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, error) {
r *http.Request) (*schema.User, error) {
session, err := auth.sessionStore.Get(r, "session")
if err != nil {
log.Error("Error while getting session store")
@@ -87,161 +62,81 @@ func (auth *Authentication) AuthViaSession(
}, nil
}
func Init() {
initOnce.Do(func() {
authInstance = &Authentication{}
func Init() (*Authentication, error) {
auth := &Authentication{}
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Error("Error while initializing authentication -> decoding session key failed")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
}
if config.Keys.LdapConfig != nil {
ldapAuth := &LdapAuthenticator{}
if err := ldapAuth.Init(); err != nil {
log.Warn("Error while initializing authentication -> ldapAuth init failed")
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Fatal("Error while initializing authentication -> decoding session key failed")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
auth.LdapAuth = ldapAuth
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
}
} else {
log.Info("Missing LDAP configuration: No LDAP support!")
}
if config.Keys.JwtConfig != nil {
auth.JwtAuth = &JWTAuthenticator{}
if err := auth.JwtAuth.Init(); err != nil {
log.Error("Error while initializing authentication -> jwtAuth init failed")
return nil, err
}
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err == nil {
authInstance.SessionMaxAge = d
}
if config.Keys.LdapConfig != nil {
ldapAuth := &LdapAuthenticator{}
if err := ldapAuth.Init(); err != nil {
log.Warn("Error while initializing authentication -> ldapAuth init failed")
} else {
authInstance.LdapAuth = ldapAuth
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
}
jwtSessionAuth := &JWTSessionAuthenticator{}
if err := jwtSessionAuth.Init(); err != nil {
log.Info("jwtSessionAuth init failed: No JWT login support!")
} else {
log.Info("Missing LDAP configuration: No LDAP support!")
auth.authenticators = append(auth.authenticators, jwtSessionAuth)
}
if config.Keys.JwtConfig != nil {
authInstance.JwtAuth = &JWTAuthenticator{}
if err := authInstance.JwtAuth.Init(); err != nil {
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
}
jwtSessionAuth := &JWTSessionAuthenticator{}
if err := jwtSessionAuth.Init(); err != nil {
log.Info("jwtSessionAuth init failed: No JWT login support!")
} else {
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
}
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
if err := jwtCookieSessionAuth.Init(); err != nil {
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
} else {
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
}
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
if err := jwtCookieSessionAuth.Init(); err != nil {
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
} else {
log.Info("Missing JWT configuration: No JWT token support!")
auth.authenticators = append(auth.authenticators, jwtCookieSessionAuth)
}
authInstance.LocalAuth = &LocalAuthenticator{}
if err := authInstance.LocalAuth.Init(); err != nil {
log.Fatal("Error while initializing authentication -> localAuth init failed")
}
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
})
}
func GetAuthInstance() *Authentication {
if authInstance == nil {
log.Fatal("Authentication module not initialized!")
} else {
log.Info("Missing JWT configuration: No JWT token support!")
}
return authInstance
}
func handleTokenUser(tokenUser *schema.User) {
r := repository.GetUserRepository()
dbUser, err := r.GetUser(tokenUser.Username)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
if err := r.AddUser(tokenUser); err != nil {
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
}
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
}
auth.LocalAuth = &LocalAuthenticator{}
if err := auth.LocalAuth.Init(); err != nil {
log.Error("Error while initializing authentication -> localAuth init failed")
return nil, err
}
}
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
func handleOIDCUser(OIDCUser *schema.User) {
r := repository.GetUserRepository()
dbUser, err := r.GetUser(OIDCUser.Username)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
if err := r.AddUser(OIDCUser); err != nil {
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
}
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
}
}
}
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
session, err := auth.sessionStore.New(r, "session")
if err != nil {
log.Errorf("session creation failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return err
}
if auth.SessionMaxAge != 0 {
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
}
if config.Keys.HttpsCertFile == "" && config.Keys.HttpsKeyFile == "" {
session.Options.Secure = false
}
session.Options.SameSite = http.SameSiteStrictMode
session.Values["username"] = user.Username
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
if err := auth.sessionStore.Save(r, rw, session); err != nil {
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return err
}
return nil
return auth, nil
}
func (auth *Authentication) Login(
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
) http.Handler {
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error)) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
ip, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
ip = r.RemoteAddr
}
username := r.FormValue("username")
limiter := getIPUserLimiter(ip, username)
if !limiter.Allow() {
log.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
onfailure(rw, r, errors.New("Too many login attempts, try again in a few minutes."))
return
}
var dbUser *schema.User
if username != "" {
var err error
dbUser, err = repository.GetUserRepository().GetUser(username)
@@ -266,19 +161,28 @@ func (auth *Authentication) Login(
return
}
if err := auth.SaveSession(rw, r, user); err != nil {
session, err := auth.sessionStore.New(r, "session")
if err != nil {
log.Errorf("session creation failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
if auth.SessionMaxAge != 0 {
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
}
session.Values["username"] = user.Username
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
if err := auth.sessionStore.Save(r, rw, session); err != nil {
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
if r.FormValue("redirect") != "" {
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
return
}
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
@@ -289,155 +193,37 @@ func (auth *Authentication) Login(
func (auth *Authentication) Auth(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error)) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("auth -> authentication failed: %s", err.Error())
log.Infof("authentication failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnauthorized)
return
}
if user == nil {
user, err = auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("auth -> authentication failed: %s", err.Error())
log.Infof("authentication failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnauthorized)
return
}
}
if user != nil {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Info("auth -> authentication failed")
log.Debug("authentication failed")
onfailure(rw, r, errors.New("unauthorized (please login first)"))
})
}
func (auth *Authentication) AuthApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("auth api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
ipErr := securedCheck(user, r)
if ipErr != nil {
log.Infof("auth api -> secured check failed: %s", ipErr.Error())
onfailure(rw, r, ipErr)
return
}
if user != nil {
switch {
case len(user.Roles) == 1:
if user.HasRole(schema.RoleApi) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
case len(user.Roles) >= 2:
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
default:
log.Info("auth api -> authentication failed: missing role")
onfailure(rw, r, errors.New("unauthorized"))
}
}
log.Info("auth api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthUserApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("auth user api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
switch {
case len(user.Roles) == 1:
if user.HasRole(schema.RoleApi) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
case len(user.Roles) >= 2:
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
default:
log.Info("auth user api -> authentication failed: missing role")
onfailure(rw, r, errors.New("unauthorized"))
}
}
log.Info("auth user api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthConfigApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("auth config api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil && user.HasRole(schema.RoleAdmin) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Info("auth config api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthFrontendApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Info("auth frontend api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
session, err := auth.sessionStore.Get(r, "session")
@@ -457,38 +243,3 @@ func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
onsuccess.ServeHTTP(rw, r)
})
}
// Helper Moved To MiddleWare Auth Handlers
func securedCheck(user *schema.User, r *http.Request) error {
if user == nil {
return fmt.Errorf("no user for secured check")
}
// extract IP address for checking
IPAddress := r.Header.Get("X-Real-Ip")
if IPAddress == "" {
IPAddress = r.Header.Get("X-Forwarded-For")
}
if IPAddress == "" {
IPAddress = r.RemoteAddr
}
if strings.Contains(IPAddress, ":") {
IPAddress = strings.Split(IPAddress, ":")[0]
}
// If nothing declared in config: deny all request to this api endpoint
if len(config.Keys.ApiAllowedIPs) == 0 {
return fmt.Errorf("missing configuration key ApiAllowedIPs")
}
// If wildcard declared in config: Continue
if config.Keys.ApiAllowedIPs[0] == "*" {
return nil
}
// check if IP is allowed
if !util.Contains(config.Keys.ApiAllowedIPs, IPAddress) {
return fmt.Errorf("unknown ip: %v", IPAddress)
}
return nil
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -17,7 +17,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/golang-jwt/jwt/v5"
"github.com/golang-jwt/jwt/v4"
)
type JWTAuthenticator struct {
@@ -49,8 +49,8 @@ func (ja *JWTAuthenticator) Init() error {
func (ja *JWTAuthenticator) AuthViaJWT(
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, error) {
r *http.Request) (*schema.User, error) {
rawtoken := r.Header.Get("X-Auth-Token")
if rawtoken == "" {
rawtoken = r.Header.Get("Authorization")
@@ -73,9 +73,9 @@ func (ja *JWTAuthenticator) AuthViaJWT(
log.Warn("Error while parsing JWT token")
return nil, err
}
if !token.Valid {
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, errors.New("jwt token claims are not valid")
return nil, err
}
// Token is valid, extract payload
@@ -88,6 +88,7 @@ func (ja *JWTAuthenticator) AuthViaJWT(
if config.Keys.JwtConfig.ValidateUser {
ur := repository.GetUserRepository()
user, err := ur.GetUser(sub)
// Deny any logins for unknown usernames
if err != nil {
log.Warn("Could not find user from JWT in internal database.")
@@ -116,6 +117,7 @@ func (ja *JWTAuthenticator) AuthViaJWT(
// Generate a new JWT that can be used for authentication
func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
if ja.privateKey == nil {
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -17,7 +17,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/golang-jwt/jwt/v5"
"github.com/golang-jwt/jwt/v4"
)
type JWTCookieSessionAuthenticator struct {
@@ -90,8 +90,8 @@ func (ja *JWTCookieSessionAuthenticator) CanLogin(
user *schema.User,
username string,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, bool) {
r *http.Request) (*schema.User, bool) {
jc := config.Keys.JwtConfig
cookieName := ""
if jc.CookieName != "" {
@@ -113,8 +113,8 @@ func (ja *JWTCookieSessionAuthenticator) CanLogin(
func (ja *JWTCookieSessionAuthenticator) Login(
user *schema.User,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, error) {
r *http.Request) (*schema.User, error) {
jc := config.Keys.JwtConfig
jwtCookie, err := r.Cookie(jc.CookieName)
var rawtoken string
@@ -144,9 +144,10 @@ func (ja *JWTCookieSessionAuthenticator) Login(
return nil, err
}
if !token.Valid {
// Check token validity and extract paypload
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, errors.New("jwt token claims are not valid")
return nil, err
}
claims := token.Claims.(jwt.MapClaims)
@@ -198,8 +199,10 @@ func (ja *JWTCookieSessionAuthenticator) Login(
AuthSource: schema.AuthViaToken,
}
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
handleTokenUser(user)
if jc.SyncUserOnLogin {
if err := repository.GetUserRepository().AddUser(user); err != nil {
log.Errorf("Error while adding user '%s' to DB", user.Username)
}
}
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -17,7 +17,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/golang-jwt/jwt/v5"
"github.com/golang-jwt/jwt/v4"
)
type JWTSessionAuthenticator struct {
@@ -44,8 +44,8 @@ func (ja *JWTSessionAuthenticator) CanLogin(
user *schema.User,
username string,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, bool) {
r *http.Request) (*schema.User, bool) {
return user, r.Header.Get("Authorization") != "" ||
r.URL.Query().Get("login-token") != ""
}
@@ -53,8 +53,8 @@ func (ja *JWTSessionAuthenticator) CanLogin(
func (ja *JWTSessionAuthenticator) Login(
user *schema.User,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, error) {
r *http.Request) (*schema.User, error) {
rawtoken := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
if rawtoken == "" {
rawtoken = r.URL.Query().Get("login-token")
@@ -71,9 +71,9 @@ func (ja *JWTSessionAuthenticator) Login(
return nil, err
}
if !token.Valid {
if err = token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, errors.New("jwt token claims are not valid")
return nil, err
}
claims := token.Claims.(jwt.MapClaims)
@@ -138,8 +138,10 @@ func (ja *JWTSessionAuthenticator) Login(
AuthSource: schema.AuthViaToken,
}
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
handleTokenUser(user)
if config.Keys.JwtConfig.SyncUserOnLogin {
if err := repository.GetUserRepository().AddUser(user); err != nil {
log.Errorf("Error while adding user '%s' to DB", user.Username)
}
}
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -10,6 +10,7 @@ import (
"net/http"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
@@ -20,7 +21,7 @@ import (
type LdapAuthenticator struct {
syncPassword string
UserAttr string
UserAttr string
}
var _ Authenticator = (*LdapAuthenticator)(nil)
@@ -33,6 +34,33 @@ func (la *LdapAuthenticator) Init() error {
lc := config.Keys.LdapConfig
if lc.SyncInterval != "" {
interval, err := time.ParseDuration(lc.SyncInterval)
if err != nil {
log.Warnf("Could not parse duration for sync interval: %v",
lc.SyncInterval)
return err
}
if interval == 0 {
log.Info("Sync interval is zero")
return nil
}
go func() {
ticker := time.NewTicker(interval)
for t := range ticker.C {
log.Printf("sync started at %s", t.Format(time.RFC3339))
if err := la.Sync(); err != nil {
log.Errorf("sync failed: %s", err.Error())
}
log.Print("sync done")
}
}()
} else {
log.Info("LDAP configuration key sync_interval invalid")
}
if lc.UserAttr != "" {
la.UserAttr = lc.UserAttr
} else {
@@ -46,8 +74,8 @@ func (la *LdapAuthenticator) CanLogin(
user *schema.User,
username string,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, bool) {
r *http.Request) (*schema.User, bool) {
lc := config.Keys.LdapConfig
if user != nil {
@@ -110,8 +138,8 @@ func (la *LdapAuthenticator) CanLogin(
func (la *LdapAuthenticator) Login(
user *schema.User,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, error) {
r *http.Request) (*schema.User, error) {
l, err := la.getLdapConnection(false)
if err != nil {
log.Warn("Error while getting ldap connection")
@@ -210,6 +238,7 @@ func (la *LdapAuthenticator) Sync() error {
}
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
lc := config.Keys.LdapConfig
conn, err := ldap.DialURL(lc.Url)
if err != nil {

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

View File

@@ -1,196 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package auth
import (
"context"
"crypto/rand"
"encoding/base64"
"io"
"net/http"
"os"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/coreos/go-oidc/v3/oidc"
"github.com/gorilla/mux"
"golang.org/x/oauth2"
)
type OIDC struct {
client *oauth2.Config
provider *oidc.Provider
authentication *Authentication
clientID string
}
func randString(nByte int) (string, error) {
b := make([]byte, nByte)
if _, err := io.ReadFull(rand.Reader, b); err != nil {
return "", err
}
return base64.RawURLEncoding.EncodeToString(b), nil
}
func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value string) {
c := &http.Cookie{
Name: name,
Value: value,
MaxAge: int(time.Hour.Seconds()),
Secure: r.TLS != nil,
HttpOnly: true,
}
http.SetCookie(w, c)
}
func NewOIDC(a *Authentication) *OIDC {
provider, err := oidc.NewProvider(context.Background(), config.Keys.OpenIDConfig.Provider)
if err != nil {
log.Fatal(err)
}
clientID := os.Getenv("OID_CLIENT_ID")
if clientID == "" {
log.Warn("environment variable 'OID_CLIENT_ID' not set (Open ID connect auth will not work)")
}
clientSecret := os.Getenv("OID_CLIENT_SECRET")
if clientSecret == "" {
log.Warn("environment variable 'OID_CLIENT_SECRET' not set (Open ID connect auth will not work)")
}
client := &oauth2.Config{
ClientID: clientID,
ClientSecret: clientSecret,
Endpoint: provider.Endpoint(),
RedirectURL: "oidc-callback",
Scopes: []string{oidc.ScopeOpenID, "profile", "email"},
}
oa := &OIDC{provider: provider, client: client, clientID: clientID, authentication: a}
return oa
}
func (oa *OIDC) RegisterEndpoints(r *mux.Router) {
r.HandleFunc("/oidc-login", oa.OAuth2Login)
r.HandleFunc("/oidc-callback", oa.OAuth2Callback)
}
func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
c, err := r.Cookie("state")
if err != nil {
http.Error(rw, "state cookie not found", http.StatusBadRequest)
return
}
state := c.Value
c, err = r.Cookie("verifier")
if err != nil {
http.Error(rw, "verifier cookie not found", http.StatusBadRequest)
return
}
codeVerifier := c.Value
_ = r.ParseForm()
if r.Form.Get("state") != state {
http.Error(rw, "State invalid", http.StatusBadRequest)
return
}
code := r.Form.Get("code")
if code == "" {
http.Error(rw, "Code not found", http.StatusBadRequest)
return
}
token, err := oa.client.Exchange(context.Background(), code, oauth2.VerifierOption(codeVerifier))
if err != nil {
http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
return
}
userInfo, err := oa.provider.UserInfo(context.Background(), oauth2.StaticTokenSource(token))
if err != nil {
http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
return
}
// // Extract the ID Token from OAuth2 token.
// rawIDToken, ok := token.Extra("id_token").(string)
// if !ok {
// http.Error(rw, "Cannot access idToken", http.StatusInternalServerError)
// }
//
// verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID})
// // Parse and verify ID Token payload.
// idToken, err := verifier.Verify(context.Background(), rawIDToken)
// if err != nil {
// http.Error(rw, "Failed to extract idToken: "+err.Error(), http.StatusInternalServerError)
// }
projects := make([]string, 0)
// Extract custom claims
var claims struct {
Username string `json:"preferred_username"`
Name string `json:"name"`
Profile struct {
Client struct {
Roles []string `json:"roles"`
} `json:"clustercockpit"`
} `json:"resource_access"`
}
if err := userInfo.Claims(&claims); err != nil {
http.Error(rw, "Failed to extract Claims: "+err.Error(), http.StatusInternalServerError)
}
var roles []string
for _, r := range claims.Profile.Client.Roles {
switch r {
case "user":
roles = append(roles, schema.GetRoleString(schema.RoleUser))
case "admin":
roles = append(roles, schema.GetRoleString(schema.RoleAdmin))
}
}
if len(roles) == 0 {
roles = append(roles, schema.GetRoleString(schema.RoleUser))
}
user := &schema.User{
Username: claims.Username,
Name: claims.Name,
Roles: roles,
Projects: projects,
AuthSource: schema.AuthViaOIDC,
}
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
handleOIDCUser(user)
}
oa.authentication.SaveSession(rw, r, user)
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(ctx))
}
func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
state, err := randString(16)
if err != nil {
http.Error(rw, "Internal error", http.StatusInternalServerError)
return
}
setCallbackCookie(rw, r, "state", state)
// use PKCE to protect against CSRF attacks
codeVerifier := oauth2.GenerateVerifier()
setCallbackCookie(rw, r, "verifier", codeVerifier)
// Redirect user to consent page to ask for permission
url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.S256ChallengeOption(codeVerifier))
http.Redirect(rw, r, url, http.StatusFound)
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -7,9 +7,9 @@ package config
import (
"bytes"
"encoding/json"
"log"
"os"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -29,9 +29,8 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_showFootprint": true,
"job_list_usePaging": false,
"plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3,
@@ -53,20 +52,20 @@ func Init(flagConfigFile string) {
raw, err := os.ReadFile(flagConfigFile)
if err != nil {
if !os.IsNotExist(err) {
log.Abortf("Config Init: Could not read config file '%s'.\nError: %s\n", flagConfigFile, err.Error())
log.Fatalf("CONFIG ERROR: %v", err)
}
} else {
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
log.Abortf("Config Init: Could not validate config file '%s'.\nError: %s\n", flagConfigFile, err.Error())
log.Fatalf("Validate config: %v\n", err)
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil {
log.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", flagConfigFile, err.Error())
log.Fatalf("could not decode: %v", err)
}
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {
log.Abort("Config Init: At least one cluster required in config. Exited with error.")
log.Fatal("At least one cluster required in config!")
}
}
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

View File

@@ -1,44 +0,0 @@
package config
import (
"encoding/json"
"os"
"strings"
)
type DefaultMetricsCluster struct {
Name string `json:"name"`
DefaultMetrics string `json:"default_metrics"`
}
type DefaultMetricsConfig struct {
Clusters []DefaultMetricsCluster `json:"clusters"`
}
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
filePath := "default_metrics.json"
if _, err := os.Stat(filePath); os.IsNotExist(err) {
return nil, nil
}
data, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
var cfg DefaultMetricsConfig
if err := json.Unmarshal(data, &cfg); err != nil {
return nil, err
}
return &cfg, nil
}
func ParseMetricsString(s string) []string {
parts := strings.Split(s, ",")
var metrics []string
for _, p := range parts {
trimmed := strings.TrimSpace(p)
if trimmed != "" {
metrics = append(metrics, trimmed)
}
}
return metrics
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

View File

@@ -16,23 +16,11 @@ type Count struct {
Count int `json:"count"`
}
type EnergyFootprintValue struct {
Hardware string `json:"hardware"`
Metric string `json:"metric"`
Value float64 `json:"value"`
}
type FloatRange struct {
From float64 `json:"from"`
To float64 `json:"to"`
}
type FootprintValue struct {
Name string `json:"name"`
Stat string `json:"stat"`
Value float64 `json:"value"`
}
type Footprints struct {
TimeWeights *TimeWeights `json:"timeWeights"`
Metrics []*MetricFootprints `json:"metrics"`
@@ -58,14 +46,16 @@ type JobFilter struct {
Cluster *StringInput `json:"cluster,omitempty"`
Partition *StringInput `json:"partition,omitempty"`
Duration *schema.IntRange `json:"duration,omitempty"`
Energy *FloatRange `json:"energy,omitempty"`
MinRunningFor *int `json:"minRunningFor,omitempty"`
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
StartTime *schema.TimeRange `json:"startTime,omitempty"`
State []schema.JobState `json:"state,omitempty"`
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg,omitempty"`
MemBwAvg *FloatRange `json:"memBwAvg,omitempty"`
LoadAvg *FloatRange `json:"loadAvg,omitempty"`
MemUsedMax *FloatRange `json:"memUsedMax,omitempty"`
Exclusive *int `json:"exclusive,omitempty"`
Node *StringInput `json:"node,omitempty"`
}
@@ -88,22 +78,10 @@ type JobMetricWithName struct {
}
type JobResultList struct {
Items []*schema.Job `json:"items"`
Offset *int `json:"offset,omitempty"`
Limit *int `json:"limit,omitempty"`
Count *int `json:"count,omitempty"`
HasNextPage *bool `json:"hasNextPage,omitempty"`
}
type JobStats struct {
Name string `json:"name"`
Stats *schema.MetricStatistics `json:"stats"`
}
type JobStatsWithScope struct {
Name string `json:"name"`
Scope schema.MetricScope `json:"scope"`
Stats []*ScopedStats `json:"stats"`
Items []*schema.Job `json:"items"`
Offset *int `json:"offset,omitempty"`
Limit *int `json:"limit,omitempty"`
Count *int `json:"count,omitempty"`
}
type JobsStatistics struct {
@@ -141,36 +119,17 @@ type MetricHistoPoint struct {
type MetricHistoPoints struct {
Metric string `json:"metric"`
Unit string `json:"unit"`
Stat *string `json:"stat,omitempty"`
Data []*MetricHistoPoint `json:"data,omitempty"`
}
type MetricStatItem struct {
MetricName string `json:"metricName"`
Range *FloatRange `json:"range"`
}
type Mutation struct {
}
type NodeMetrics struct {
Host string `json:"host"`
SubCluster string `json:"subCluster"`
Metrics []*JobMetricWithName `json:"metrics"`
}
type NodesResultList struct {
Items []*NodeMetrics `json:"items"`
Offset *int `json:"offset,omitempty"`
Limit *int `json:"limit,omitempty"`
Count *int `json:"count,omitempty"`
TotalNodes *int `json:"totalNodes,omitempty"`
HasNextPage *bool `json:"hasNextPage,omitempty"`
}
type OrderByInput struct {
Field string `json:"field"`
Type string `json:"type"`
Order SortDirectionEnum `json:"order"`
}
@@ -179,12 +138,6 @@ type PageRequest struct {
Page int `json:"page"`
}
type ScopedStats struct {
Hostname string `json:"hostname"`
ID *string `json:"id,omitempty"`
Data *schema.MetricStatistics `json:"data"`
}
type StringInput struct {
Eq *string `json:"eq,omitempty"`
Neq *string `json:"neq,omitempty"`
@@ -195,9 +148,8 @@ type StringInput struct {
}
type TimeRangeOutput struct {
Range *string `json:"range,omitempty"`
From time.Time `json:"from"`
To time.Time `json:"to"`
From time.Time `json:"from"`
To time.Time `json:"to"`
}
type TimeWeights struct {
@@ -238,7 +190,7 @@ func (e Aggregate) String() string {
return string(e)
}
func (e *Aggregate) UnmarshalGQL(v any) error {
func (e *Aggregate) UnmarshalGQL(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("enums must be strings")
@@ -291,7 +243,7 @@ func (e SortByAggregate) String() string {
return string(e)
}
func (e *SortByAggregate) UnmarshalGQL(v any) error {
func (e *SortByAggregate) UnmarshalGQL(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("enums must be strings")
@@ -332,7 +284,7 @@ func (e SortDirectionEnum) String() string {
return string(e)
}
func (e *SortDirectionEnum) UnmarshalGQL(v any) error {
func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("enums must be strings")

View File

@@ -1,39 +1,15 @@
package graph
import (
"sync"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/jmoiron/sqlx"
)
// This file will not be regenerated automatically.
//
// It serves as dependency injection for your app, add any dependencies you require here.
var (
initOnce sync.Once
resolverInstance *Resolver
)
type Resolver struct {
DB *sqlx.DB
Repo *repository.JobRepository
}
func Init() {
initOnce.Do(func() {
db := repository.GetConnection()
resolverInstance = &Resolver{
DB: db.DB, Repo: repository.GetJobRepository(),
}
})
}
func GetResolverInstance() *Resolver {
if resolverInstance == nil {
log.Fatal("Authentication module not initialized!")
}
return resolverInstance
}

View File

@@ -2,22 +2,18 @@ package graph
// This file will be automatically regenerated based on the schema, any resolver implementations
// will be copied through when generating and any unknown code will be moved to the end.
// Code generated by github.com/99designs/gqlgen version v0.17.66
// Code generated by github.com/99designs/gqlgen version v0.17.40
import (
"context"
"errors"
"fmt"
"regexp"
"slices"
"strconv"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
@@ -31,12 +27,15 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
// Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
return r.Repo.GetTags(&obj.ID)
}
// ConcurrentJobs is the resolver for the concurrentJobs field.
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
// FIXME: Make the hardcoded duration configurable
if obj.State == schema.JobStateRunning {
obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
}
if obj.Exclusive != 1 && obj.Duration > 600 {
return r.Repo.FindConcurrentJobs(ctx, obj)
}
@@ -44,72 +43,8 @@ func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*mod
return nil, nil
}
// Footprint is the resolver for the footprint field.
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
rawFootprint, err := r.Repo.FetchFootprint(obj)
if err != nil {
log.Warn("Error while fetching job footprint data")
return nil, err
}
res := []*model.FootprintValue{}
for name, value := range rawFootprint {
parts := strings.Split(name, "_")
statPart := parts[len(parts)-1]
nameParts := parts[:len(parts)-1]
res = append(res, &model.FootprintValue{
Name: strings.Join(nameParts, "_"),
Stat: statPart,
Value: value,
})
}
return res, err
}
// EnergyFootprint is the resolver for the energyFootprint field.
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
if err != nil {
log.Warn("Error while fetching job energy footprint data")
return nil, err
}
res := []*model.EnergyFootprintValue{}
for name, value := range rawEnergyFootprint {
// Suboptimal: Nearly hardcoded metric name expectations
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
matchCore := regexp.MustCompile(`core|Core|CORE`)
hwType := ""
switch test := name; { // NOtice ';' for var declaration
case matchCpu.MatchString(test):
hwType = "CPU"
case matchAcc.MatchString(test):
hwType = "Accelerator"
case matchMem.MatchString(test):
hwType = "Memory"
case matchCore.MatchString(test):
hwType = "Core"
default:
hwType = "Other"
}
res = append(res, &model.EnergyFootprintValue{
Hardware: hwType,
Metric: name,
Value: value,
})
}
return res, err
}
// MetaData is the resolver for the metaData field.
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
return r.Repo.FetchMetadata(obj)
}
@@ -118,48 +53,24 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
}
// Name is the resolver for the name field.
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
panic(fmt.Errorf("not implemented: Name - name"))
}
// CreateTag is the resolver for the createTag field.
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
user := repository.GetUserFromContext(ctx)
if user == nil {
return nil, fmt.Errorf("no user in context")
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name)
if err != nil {
log.Warn("Error while creating tag")
return nil, err
}
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
if user.HasRole(schema.RoleAdmin) && scope == "admin" ||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && scope == "global" ||
user.Username == scope {
// Create in DB
id, err := r.Repo.CreateTag(typeArg, name, scope)
if err != nil {
log.Warn("Error while creating tag")
return nil, err
}
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
} else {
log.Warnf("Not authorized to create tag with scope: %s", scope)
return nil, fmt.Errorf("Not authorized to create tag with scope: %s", scope)
}
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
}
// DeleteTag is the resolver for the deleteTag field.
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
// This Uses ID string <-> ID string, removeTagFromList uses []string <-> []int
panic(fmt.Errorf("not implemented: DeleteTag - deleteTag"))
}
// AddTagsToJob is the resolver for the addTagsToJob field.
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
user := repository.GetUserFromContext(ctx)
if user == nil {
return nil, fmt.Errorf("no user in context")
}
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while adding tag to job")
@@ -168,32 +79,15 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
tags := []*schema.Tag{}
for _, tagId := range tagIds {
// Get ID
tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil {
log.Warn("Error while parsing tag id")
return nil, err
}
// Test Exists
_, _, tscope, exists := r.Repo.TagInfo(tid)
if !exists {
log.Warnf("Tag does not exist (ID): %d", tid)
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
}
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
user.Username == tscope {
// Add to Job
if tags, err = r.Repo.AddTag(user, jid, tid); err != nil {
log.Warn("Error while adding tag")
return nil, err
}
} else {
log.Warnf("Not authorized to add tag: %d", tid)
return nil, fmt.Errorf("Not authorized to add tag: %d", tid)
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
log.Warn("Error while adding tag")
return nil, err
}
}
@@ -202,11 +96,6 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
user := repository.GetUserFromContext(ctx)
if user == nil {
return nil, fmt.Errorf("no user in context")
}
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while parsing job id")
@@ -215,77 +104,18 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
tags := []*schema.Tag{}
for _, tagId := range tagIds {
// Get ID
tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil {
log.Warn("Error while parsing tag id")
return nil, err
}
// Test Exists
_, _, tscope, exists := r.Repo.TagInfo(tid)
if !exists {
log.Warnf("Tag does not exist (ID): %d", tid)
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
}
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
user.Username == tscope {
// Remove from Job
if tags, err = r.Repo.RemoveTag(user, jid, tid); err != nil {
log.Warn("Error while removing tag")
return nil, err
}
} else {
log.Warnf("Not authorized to remove tag: %d", tid)
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
}
}
return tags, nil
}
// RemoveTagFromList is the resolver for the removeTagFromList field.
func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []string) ([]int, error) {
// Needs Contextuser
user := repository.GetUserFromContext(ctx)
if user == nil {
return nil, fmt.Errorf("no user in context")
}
tags := []int{}
for _, tagId := range tagIds {
// Get ID
tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil {
log.Warn("Error while parsing tag id for removal")
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
log.Warn("Error while removing tag")
return nil, err
}
// Test Exists
_, _, tscope, exists := r.Repo.TagInfo(tid)
if !exists {
log.Warnf("Tag does not exist (ID): %d", tid)
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
}
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
if user.HasRole(schema.RoleAdmin) && (tscope == "global" || tscope == "admin") || user.Username == tscope {
// Remove from DB
if err = r.Repo.RemoveTagById(tid); err != nil {
log.Warn("Error while removing tag")
return nil, err
} else {
tags = append(tags, int(tid))
}
} else {
log.Warnf("Not authorized to remove tag: %d", tid)
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
}
}
return tags, nil
}
@@ -306,12 +136,7 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
// Tags is the resolver for the tags field.
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
return r.Repo.GetTags(repository.GetUserFromContext(ctx), nil)
}
// GlobalMetrics is the resolver for the globalMetrics field.
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
return archive.GlobalMetricList, nil
return r.Repo.GetTags(nil)
}
// User is the resolver for the user field.
@@ -346,7 +171,7 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
return nil, err
}
job, err := r.Repo.FindById(ctx, numericId)
job, err := r.Repo.FindById(numericId)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
@@ -362,24 +187,14 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
}
// JobMetrics is the resolver for the jobMetrics field.
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
if resolution == nil { // Load from Config
if config.Keys.EnableResampling != nil {
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
resolution = &defaultRes
} else { // Set 0 (Loads configured metric timestep)
defaultRes := 0
resolution = &defaultRes
}
}
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warn("Error while querying job for metrics")
return nil, err
}
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
if err != nil {
log.Warn("Error while loading job data")
return nil, err
@@ -399,72 +214,8 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
return res, err
}
// JobStats is the resolver for the jobStats field.
func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []string) ([]*model.JobStats, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warnf("Error while querying job %s for metadata", id)
return nil, err
}
data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx)
if err != nil {
log.Warnf("Error while loading jobStats data for job id %s", id)
return nil, err
}
res := []*model.JobStats{}
for name, md := range data {
res = append(res, &model.JobStats{
Name: name,
Stats: &md,
})
}
return res, err
}
// ScopedJobStats is the resolver for the scopedJobStats field.
func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobStatsWithScope, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warnf("Error while querying job %s for metadata", id)
return nil, err
}
data, err := metricDataDispatcher.LoadScopedJobStats(job, metrics, scopes, ctx)
if err != nil {
log.Warnf("Error while loading scopedJobStats data for job id %s", id)
return nil, err
}
res := make([]*model.JobStatsWithScope, 0)
for name, scoped := range data {
for scope, stats := range scoped {
mdlStats := make([]*model.ScopedStats, 0)
for _, stat := range stats {
mdlStats = append(mdlStats, &model.ScopedStats{
Hostname: stat.Hostname,
ID: stat.Id,
Data: stat.Data,
})
}
res = append(res, &model.JobStatsWithScope{
Name: name,
Scope: scope,
Stats: mdlStats,
})
}
}
return res, nil
}
// JobsFootprints is the resolver for the jobsFootprints field.
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
return r.jobsFootprints(ctx, filter, metrics)
}
@@ -489,39 +240,14 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
return nil, err
}
// Note: Even if App-Default 'config.Keys.UiDefaults["job_list_usePaging"]' is set, always return hasNextPage boolean.
// Users can decide in frontend to use continuous scroll, even if app-default is paging!
/*
Example Page 4 @ 10 IpP : Does item 41 exist?
Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
*/
nextPage := &model.PageRequest{
ItemsPerPage: 1,
Page: ((page.Page * page.ItemsPerPage) + 1),
}
nextJobs, err := r.Repo.QueryJobs(ctx, filter, nextPage, order)
if err != nil {
log.Warn("Error while querying next jobs")
return nil, err
}
hasNextPage := false
if len(nextJobs) == 1 {
hasNextPage = true
}
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
return &model.JobResultList{Items: jobs, Count: &count}, nil
}
// JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) ([]*model.JobsStatistics, error) {
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
var err error
var stats []*model.JobsStatistics
// Top Level Defaults
var defaultDurationBins string = "1h"
var defaultMetricBins int = 10
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
if groupBy == nil {
@@ -555,13 +281,8 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
}
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
if numDurationBins == nil {
numDurationBins = &defaultDurationBins
}
if groupBy == nil {
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0], numDurationBins)
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
if err != nil {
return nil, err
}
@@ -571,13 +292,8 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
}
if requireField(ctx, "histMetrics") {
if numMetricBins == nil {
numMetricBins = &defaultMetricBins
}
if groupBy == nil {
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0], numMetricBins)
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0])
if err != nil {
return nil, err
}
@@ -597,8 +313,8 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
// NodeMetrics is the resolver for the nodeMetrics field.
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
user := repository.GetUserFromContext(ctx)
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
return nil, errors.New("you need to be administrator or support staff for this query")
if user != nil && !user.HasRole(schema.RoleAdmin) {
return nil, errors.New("you need to be an administrator for this query")
}
if metrics == nil {
@@ -607,9 +323,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
}
}
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
log.Warn("error while loading node data")
log.Warn("Error while loading node data")
return nil, err
}
@@ -619,10 +335,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
Host: hostname,
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
if err != nil {
log.Warnf("error in nodeMetrics resolver: %s", err)
}
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
for metric, scopedMetrics := range metrics {
for _, scopedMetric := range scopedMetrics {
@@ -640,68 +353,6 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
return nodeMetrics, nil
}
// NodeMetricsList is the resolver for the nodeMetricsList field.
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
if resolution == nil { // Load from Config
if config.Keys.EnableResampling != nil {
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
resolution = &defaultRes
} else { // Set 0 (Loads configured metric timestep)
defaultRes := 0
resolution = &defaultRes
}
}
user := repository.GetUserFromContext(ctx)
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
return nil, errors.New("you need to be administrator or support staff for this query")
}
if metrics == nil {
for _, mc := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, mc.Name)
}
}
data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, *resolution, from, to, page, ctx)
if err != nil {
log.Warn("error while loading node data")
return nil, err
}
nodeMetricsList := make([]*model.NodeMetrics, 0, len(data))
for hostname, metrics := range data {
host := &model.NodeMetrics{
Host: hostname,
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
if err != nil {
log.Warnf("error in nodeMetrics resolver: %s", err)
}
for metric, scopedMetrics := range metrics {
for scope, scopedMetric := range scopedMetrics {
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
Name: metric,
Scope: scope,
Metric: scopedMetric,
})
}
}
nodeMetricsList = append(nodeMetricsList, host)
}
nodeMetricsListResult := &model.NodesResultList{
Items: nodeMetricsList,
TotalNodes: &totalNodes,
HasNextPage: &hasNextPage,
}
return nodeMetricsListResult, nil
}
// NumberOfNodes is the resolver for the numberOfNodes field.
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
nodeList, err := archive.ParseNodeList(obj.Nodes)
@@ -717,9 +368,6 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
// Job returns generated.JobResolver implementation.
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
// MetricValue returns generated.MetricValueResolver implementation.
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
// Mutation returns generated.MutationResolver implementation.
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
@@ -731,7 +379,6 @@ func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subCluste
type clusterResolver struct{ *Resolver }
type jobResolver struct{ *Resolver }
type metricValueResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver }

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -11,7 +11,7 @@ import (
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -24,8 +24,8 @@ func (r *queryResolver) rooflineHeatmap(
ctx context.Context,
filter []*model.JobFilter,
rows int, cols int,
minX float64, minY float64, maxX float64, maxY float64,
) ([][]float64, error) {
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
log.Error("Error while querying jobs for roofline")
@@ -47,14 +47,7 @@ func (r *queryResolver) rooflineHeatmap(
continue
}
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
// resolution := 0
// for _, mc := range metricConfigs {
// resolution = max(resolution, mc.Timestep)
// }
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil {
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
return nil, err
@@ -127,7 +120,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
continue
}
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Error("Error while loading averages for footprint")
return nil, err
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -8,9 +8,9 @@ import (
"bytes"
"encoding/json"
"fmt"
"math"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
@@ -42,8 +42,8 @@ func HandleImportFlag(flag string) error {
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
job := schema.JobMeta{BaseJob: schema.JobDefaults}
if err = dec.Decode(&job); err != nil {
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err = dec.Decode(&jobMeta); err != nil {
log.Warn("Error while decoding raw json metadata for import")
return err
}
@@ -67,68 +67,32 @@ func HandleImportFlag(flag string) error {
return err
}
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
// checkJobData(&jobData)
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return err
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
// if err != nil {
// log.Warn("Error while finding job in jobRepository")
// return err
// }
//
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
// }
//
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
job.Footprint = make(map[string]float64)
for _, fp := range sc.Footprint {
statType := "avg"
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
log.Warn("Error while marshaling job footprint")
return err
}
job.EnergyFootprint = make(map[string]float64)
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
}
job.EnergyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
}
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
return err
}
// TODO: Other metrics...
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used")
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
@@ -146,7 +110,7 @@ func HandleImportFlag(flag string) error {
return err
}
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
log.Error("Error while importing job")
return err
}
@@ -158,8 +122,8 @@ func HandleImportFlag(flag string) error {
}
for _, tag := range job.Tags {
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
log.Error("Error while adding or creating tag on import")
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
log.Error("Error while adding or creating tag")
return err
}
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -45,9 +45,6 @@ func setup(t *testing.T) *repository.JobRepository {
"jwts": {
"max-age": "2m"
},
"apiAllowedIPs": [
"*"
],
"clusters": [
{
"name": "testcluster",
@@ -85,7 +82,7 @@ func setup(t *testing.T) *repository.JobRepository {
if err := os.Mkdir(jobarchive, 0777); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
t.Fatal(err)
}
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -7,7 +7,6 @@ package importer
import (
"encoding/json"
"fmt"
"math"
"strings"
"time"
@@ -17,11 +16,6 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const (
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
)
// Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`.
func InitDB() error {
@@ -66,66 +60,13 @@ func InitDB() error {
StartTimeUnix: jobMeta.StartTime,
}
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return err
}
job.Footprint = make(map[string]float64)
for _, fp := range sc.Footprint {
statType := "avg"
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
log.Warn("Error while marshaling job footprint")
return err
}
job.EnergyFootprint = make(map[string]float64)
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
job.EnergyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
}
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return err
}
// TODO: Other metrics...
job.LoadAvg = loadJobStat(jobMeta, "cpu_load")
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemUsedMax = loadJobStat(jobMeta, "mem_used")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
@@ -147,8 +88,7 @@ func InitDB() error {
continue
}
id, err := r.TransactionAddNamed(t,
repository.NamedJobInsert, job)
id, err := r.TransactionAdd(t, job)
if err != nil {
log.Errorf("repository initDB(): %v", err)
errorOccured++
@@ -159,9 +99,7 @@ func InitDB() error {
tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr]
if !ok {
tagId, err = r.TransactionAdd(t,
addTagQuery,
tag.Name, tag.Type)
tagId, err = r.TransactionAddTag(t, tag)
if err != nil {
log.Errorf("Error adding tag: %v", err)
errorOccured++
@@ -170,9 +108,7 @@ func InitDB() error {
tags[tagstr] = tagId
}
r.TransactionAdd(t,
setTagQuery,
id, tagId)
r.TransactionSetTag(t, id, tagId)
}
if err == nil {
@@ -214,6 +150,18 @@ func SanityChecks(job *schema.BaseJob) error {
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
if metric == "mem_used" {
return stats.Max
} else {
return stats.Avg
}
}
return 0.0
}
func checkJobData(d *schema.JobData) error {
for _, scopes := range *d {
// var newUnit schema.Unit

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

File diff suppressed because it is too large Load Diff

View File

@@ -1,383 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricDataDispatcher
import (
"context"
"fmt"
"math"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
resolution int,
) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
job.ID, job.State, metrics, scopes, resolution)
}
// Fetches the metric data for a job.
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
config.Keys.DisableArchive {
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
if err != nil {
if len(jd) != 0 {
log.Warnf("partial error: %s", err.Error())
// return err, 0, 0 // Reactivating will block archiving on one partial error
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
size = jd.Size()
} else {
var jd_temp schema.JobData
jd_temp, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
//Deep copy the cached archive hashmap
jd = metricdata.DeepCopy(jd_temp)
//Resampling for archived data.
//Pass the resolution from frontend here.
for _, v := range jd {
for _, v_ := range v {
timestep := 0
for i := 0; i < len(v_.Series); i += 1 {
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
if err != nil {
return err, 0, 0
}
}
v_.Timestep = timestep
}
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
if len(subset) > 0 {
perscope = subset
}
}
res[metric] = perscope
}
}
jd = res
}
size = jd.Size()
}
ttl = 5 * time.Hour
if job.State == schema.JobStateRunning {
ttl = 2 * time.Minute
}
// FIXME: Review: Is this really necessary or correct.
// Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes)
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/median/max Graph can be
// used instead of a lot of single lines.
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
// Existing (archived) StatsSeries can be 'min/mean/max'!
const maxSeriesSize int = 15
for _, scopes := range jd {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}
if nodeScopeRequested {
jd.AddNodeScope("flops_any")
jd.AddNodeScope("mem_bw")
}
// Round Resulting Stat Values
jd.RoundMetricStats()
return jd, ttl, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
return data.(schema.JobData), nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context,
) error {
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
}
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
// Used for statsTable in frontend: Return scoped statistics by metric.
func LoadScopedJobStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
) (schema.ScopedJobStats, error) {
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
return archive.LoadScopedStatsFromArchive(job, metrics, scopes)
}
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return nil, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
}
scopedStats, err := repo.LoadScopedStats(job, metrics, scopes, ctx)
if err != nil {
log.Errorf("error while loading scoped statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
return nil, err
}
return scopedStats, nil
}
// Used for polar plots in frontend: Aggregates statistics for all nodes to single values for job per metric.
func LoadJobStats(
job *schema.Job,
metrics []string,
ctx context.Context,
) (map[string]schema.MetricStatistics, error) {
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
return archive.LoadStatsFromArchive(job, metrics)
}
data := make(map[string]schema.MetricStatistics, len(metrics))
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return data, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)
if err != nil {
log.Errorf("error while loading statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
return data, err
}
for _, m := range metrics {
sum, avg, min, max := 0.0, 0.0, 0.0, 0.0
nodes, ok := stats[m]
if !ok {
data[m] = schema.MetricStatistics{Min: min, Avg: avg, Max: max}
continue
}
for _, node := range nodes {
sum += node.Avg
min = math.Min(min, node.Min)
max = math.Max(max, node.Max)
}
data[m] = schema.MetricStatistics{
Avg: (math.Round((sum/float64(job.NumNodes))*100) / 100),
Min: (math.Round(min*100) / 100),
Max: (math.Round(max*100) / 100),
}
}
return data, nil
}
// Used for the classic node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
repo, err := metricdata.GetMetricDataRepo(cluster)
if err != nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
}
func LoadNodeListData(
cluster, subCluster, nodeFilter string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
from, to time.Time,
page *model.PageRequest,
ctx context.Context,
) (map[string]schema.JobData, int, bool, error) {
repo, err := metricdata.GetMetricDataRepo(cluster)
if err != nil {
return nil, 0, false, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, resolution, from, to, page, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, totalNodes, hasNextPage, err
}
}
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
const maxSeriesSize int = 8
for _, jd := range data {
for _, scopes := range jd {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) < maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
}
if data == nil {
return nil, totalNodes, hasNextPage, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, totalNodes, hasNextPage, nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -10,12 +10,9 @@ import (
"encoding/json"
"errors"
"fmt"
"math"
"sort"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -63,10 +60,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int) (schema.JobData, error) {
log.Infof("InfluxDB 2 Backend: Resolution Scaling not Implemented, will return default timestep. Requested Resolution %d", resolution)
ctx context.Context) (schema.JobData, error) {
measurementsConds := make([]string, 0, len(metrics))
for _, m := range metrics {
@@ -90,7 +84,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
query := ""
switch scope {
case "node":
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows <-- Resolution could be added here?
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
// log.Info("Scope 'node' requested. ")
query = fmt.Sprintf(`
from(bucket: "%s")
@@ -120,12 +114,6 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// idb.bucket,
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
// measurementsCond, hostsCond)
case "hwthread":
log.Info(" Scope 'hwthread' requested, but not yet supported: Will return 'node' scope only. ")
continue
case "accelerator":
log.Info(" Scope 'accelerator' requested, but not yet supported: Will return 'node' scope only. ")
continue
default:
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
continue
@@ -183,11 +171,6 @@ func (idb *InfluxDBv2DataRepository) LoadData(
}
case "socket":
continue
case "accelerator":
continue
case "hwthread":
// See below @ core
continue
case "core":
continue
// Include Series.Id in hostSeries
@@ -316,53 +299,6 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
return stats, nil
}
// Used in Job-View StatsTable
// UNTESTED
func (idb *InfluxDBv2DataRepository) LoadScopedStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.ScopedJobStats, error) {
// Assumption: idb.loadData() only returns series node-scope - use node scope for statsTable
scopedJobStats := make(schema.ScopedJobStats)
data, err := idb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
if err != nil {
log.Warn("Error while loading job for scopedJobStats")
return nil, err
}
for metric, metricData := range data {
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func() {
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue
}
if _, ok := scopedJobStats[metric]; !ok {
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
}
if _, ok := scopedJobStats[metric][scope]; !ok {
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
}
for _, series := range metricData[scope].Series {
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
Hostname: series.Hostname,
Data: &series.Statistics,
})
}
}
}
return scopedJobStats, nil
}
// Used in Systems-View @ Node-Overview
// UNTESTED
func (idb *InfluxDBv2DataRepository) LoadNodeData(
cluster string,
metrics, nodes []string,
@@ -370,206 +306,8 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
// Note: scopes[] Array will be ignored, only return node scope
// TODO : Implement to be used in Analysis- und System/Node-View
log.Infof("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
// CONVERT ARGS TO INFLUX
measurementsConds := make([]string, 0)
for _, m := range metrics {
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
}
measurementsCond := strings.Join(measurementsConds, " or ")
hostsConds := make([]string, 0)
if nodes == nil {
var allNodes []string
subClusterNodeLists := archive.NodeLists[cluster]
for _, nodeList := range subClusterNodeLists {
allNodes = append(nodes, nodeList.PrintList()...)
}
for _, node := range allNodes {
nodes = append(nodes, node)
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
}
} else {
for _, node := range nodes {
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
}
}
hostsCond := strings.Join(hostsConds, " or ")
// BUILD AND PERFORM QUERY
query := fmt.Sprintf(`
from(bucket: "%s")
|> range(start: %s, stop: %s)
|> filter(fn: (r) => (%s) and (%s) )
|> drop(columns: ["_start", "_stop"])
|> group(columns: ["hostname", "_measurement"])
|> aggregateWindow(every: 60s, fn: mean)
|> drop(columns: ["_time"])`,
idb.bucket,
idb.formatTime(from), idb.formatTime(to),
measurementsCond, hostsCond)
rows, err := idb.queryClient.Query(ctx, query)
if err != nil {
log.Error("Error while performing query")
return nil, err
}
// HANDLE QUERY RETURN
// Collect Float Arrays for Node@Metric -> No Scope Handling!
influxData := make(map[string]map[string][]schema.Float)
for rows.Next() {
row := rows.Record()
host, field := row.ValueByKey("hostname").(string), row.Measurement()
influxHostData, ok := influxData[host]
if !ok {
influxHostData = make(map[string][]schema.Float)
influxData[host] = influxHostData
}
influxFieldData, ok := influxData[host][field]
if !ok {
influxFieldData = make([]schema.Float, 0)
influxData[host][field] = influxFieldData
}
val, ok := row.Value().(float64)
if ok {
influxData[host][field] = append(influxData[host][field], schema.Float(val))
} else {
influxData[host][field] = append(influxData[host][field], schema.Float(0))
}
}
// BUILD FUNCTION RETURN
data := make(map[string]map[string][]*schema.JobMetric)
for node, metricData := range influxData {
nodeData, ok := data[node]
if !ok {
nodeData = make(map[string][]*schema.JobMetric)
data[node] = nodeData
}
for metric, floatArray := range metricData {
avg, min, max := 0.0, 0.0, 0.0
for _, val := range floatArray {
avg += float64(val)
min = math.Min(min, float64(val))
max = math.Max(max, float64(val))
}
stats := schema.MetricStatistics{
Avg: (math.Round((avg/float64(len(floatArray)))*100) / 100),
Min: (math.Round(min*100) / 100),
Max: (math.Round(max*100) / 100),
}
mc := archive.GetMetricConfig(cluster, metric)
nodeData[metric] = append(nodeData[metric], &schema.JobMetric{
Unit: mc.Unit,
Timestep: mc.Timestep,
Series: []schema.Series{
{
Hostname: node,
Statistics: stats,
Data: floatArray,
},
},
})
}
}
return data, nil
}
// Used in Systems-View @ Node-List
// UNTESTED
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
cluster, subCluster, nodeFilter string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
from, to time.Time,
page *model.PageRequest,
ctx context.Context,
) (map[string]schema.JobData, int, bool, error) {
// Assumption: idb.loadData() only returns series node-scope - use node scope for NodeList
// 0) Init additional vars
var totalNodes int = 0
var hasNextPage bool = false
// 1) Get list of all nodes
var nodes []string
if subCluster != "" {
scNodes := archive.NodeLists[cluster][subCluster]
nodes = scNodes.PrintList()
} else {
subClusterNodeLists := archive.NodeLists[cluster]
for _, nodeList := range subClusterNodeLists {
nodes = append(nodes, nodeList.PrintList()...)
}
}
// 2) Filter nodes
if nodeFilter != "" {
filteredNodes := []string{}
for _, node := range nodes {
if strings.Contains(node, nodeFilter) {
filteredNodes = append(filteredNodes, node)
}
}
nodes = filteredNodes
}
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
totalNodes = len(nodes)
sort.Strings(nodes)
// 3) Apply paging
if len(nodes) > page.ItemsPerPage {
start := (page.Page - 1) * page.ItemsPerPage
end := start + page.ItemsPerPage
if end > len(nodes) {
end = len(nodes)
hasNextPage = false
} else {
hasNextPage = true
}
nodes = nodes[start:end]
}
// 4) Fetch And Convert Data, use idb.LoadNodeData() for query
rawNodeData, err := idb.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
log.Error(fmt.Sprintf("Error while loading influx nodeData for nodeListData %#v\n", err))
return nil, totalNodes, hasNextPage, err
}
data := make(map[string]schema.JobData)
for node, nodeData := range rawNodeData {
// Init Nested Map Data Structures If Not Found
hostData, ok := data[node]
if !ok {
hostData = make(schema.JobData)
data[node] = hostData
}
for metric, nodeMetricData := range nodeData {
metricData, ok := hostData[metric]
if !ok {
metricData = make(map[schema.MetricScope]*schema.JobMetric)
data[node][metric] = metricData
}
data[node][metric][schema.MetricScopeNode] = nodeMetricData[0] // Only Node Scope Returned from loadNodeData
}
}
return data, totalNodes, hasNextPage, nil
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -8,11 +8,13 @@ import (
"context"
"encoding/json"
"fmt"
"math"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -22,24 +24,22 @@ type MetricDataRepository interface {
Init(rawConfig json.RawMessage) error
// Return the JobData for the given job, only with the requested metrics.
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope only.
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
// Return a map of metrics to a map of scopes to the scoped metric statistics of the job.
LoadScopedStats(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.ScopedJobStats, error)
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
// Return a map of hosts to a map of metrics at the requested scopes for that node.
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
LoadNodeListData(cluster, subCluster, nodeFilter string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error)
}
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
func Init() error {
var useArchive bool
func Init(disableArchive bool) error {
useArchive = !disableArchive
for _, cluster := range config.Keys.Clusters {
if cluster.MetricDataRepository != nil {
var kind struct {
@@ -74,13 +74,283 @@ func Init() error {
return nil
}
func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
var err error
repo, ok := metricDataRepos[cluster]
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
if !ok {
err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
// Fetches the metric data for a job.
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
!useArchive {
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
size = jd.Size()
} else {
jd, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
if len(subset) > 0 {
perscope = subset
}
}
res[metric] = perscope
}
}
jd = res
}
size = jd.Size()
}
ttl = 5 * time.Hour
if job.State == schema.JobStateRunning {
ttl = 2 * time.Minute
}
prepareJobData(job, jd, scopes)
return jd, ttl, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
return repo, err
return data.(schema.JobData), nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context) error {
if job.State != schema.JobStateRunning && useArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
}
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
}
func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]",
job.ID, job.State, metrics, scopes)
}
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/mean/max Graph can be
// used instead of a lot of single lines.
func prepareJobData(
job *schema.Job,
jobData schema.JobData,
scopes []schema.MetricScope) {
const maxSeriesSize int = 15
for _, scopes := range jobData {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}
if nodeScopeRequested {
jobData.AddNodeScope("flops_any")
jobData.AddNodeScope("mem_bw")
}
}
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
}

View File

@@ -20,7 +20,6 @@ import (
"text/template"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -167,10 +166,10 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
var rt http.RoundTripper = nil
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
prom_pw := promcfg.Secret(prom_pw)
rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
} else {
if config.Username != "" {
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
}
}
// init client
@@ -205,8 +204,8 @@ func (pdb *PrometheusDataRepository) FormatQuery(
metric string,
scope schema.MetricScope,
nodes []string,
cluster string,
) (string, error) {
cluster string) (string, error) {
args := PromQLArgs{}
if len(nodes) > 0 {
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
@@ -234,13 +233,12 @@ func (pdb *PrometheusDataRepository) RowToSeries(
from time.Time,
step int64,
steps int64,
row *promm.SampleStream,
) schema.Series {
row *promm.SampleStream) schema.Series {
ts := from.Unix()
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
// init array of expected length with NaN
values := make([]schema.Float, steps+1)
for i := range values {
for i, _ := range values {
values[i] = schema.NaN
}
// copy recorded values from prom sample pair
@@ -265,9 +263,8 @@ func (pdb *PrometheusDataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
ctx context.Context) (schema.JobData, error) {
// TODO respect requested scope
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode)
@@ -309,6 +306,7 @@ func (pdb *PrometheusDataRepository) LoadData(
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
return nil, errors.New("Prometheus query error")
@@ -337,7 +335,7 @@ func (pdb *PrometheusDataRepository) LoadData(
pdb.RowToSeries(from, step, steps, row))
}
// only add metric if at least one host returned data
if !ok && len(jobMetric.Series) > 0 {
if !ok && len(jobMetric.Series) > 0{
jobData[metric][scope] = jobMetric
}
// sort by hostname to get uniform coloring
@@ -353,12 +351,12 @@ func (pdb *PrometheusDataRepository) LoadData(
func (pdb *PrometheusDataRepository) LoadStats(
job *schema.Job,
metrics []string,
ctx context.Context,
) (map[string]map[string]schema.MetricStatistics, error) {
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
// map of metrics of nodes of stats
stats := map[string]map[string]schema.MetricStatistics{}
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil {
log.Warn("Error while loading job for stats")
return nil, err
@@ -378,8 +376,7 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
t0 := time.Now()
// Map of hosts of metrics of value slices
data := make(map[string]map[string][]*schema.JobMetric)
@@ -414,6 +411,7 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
return nil, errors.New("Prometheus query error")
@@ -447,188 +445,3 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
return data, nil
}
// Implemented by NHR@FAU; Used in Job-View StatsTable
func (pdb *PrometheusDataRepository) LoadScopedStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.ScopedJobStats, error) {
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
scopedJobStats := make(schema.ScopedJobStats)
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
if err != nil {
log.Warn("Error while loading job for scopedJobStats")
return nil, err
}
for metric, metricData := range data {
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func() {
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue
}
if _, ok := scopedJobStats[metric]; !ok {
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
}
if _, ok := scopedJobStats[metric][scope]; !ok {
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
}
for _, series := range metricData[scope].Series {
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
Hostname: series.Hostname,
Data: &series.Statistics,
})
}
}
}
return scopedJobStats, nil
}
// Implemented by NHR@FAU; Used in NodeList-View
func (pdb *PrometheusDataRepository) LoadNodeListData(
cluster, subCluster, nodeFilter string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
from, to time.Time,
page *model.PageRequest,
ctx context.Context,
) (map[string]schema.JobData, int, bool, error) {
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
// 0) Init additional vars
var totalNodes int = 0
var hasNextPage bool = false
// 1) Get list of all nodes
var nodes []string
if subCluster != "" {
scNodes := archive.NodeLists[cluster][subCluster]
nodes = scNodes.PrintList()
} else {
subClusterNodeLists := archive.NodeLists[cluster]
for _, nodeList := range subClusterNodeLists {
nodes = append(nodes, nodeList.PrintList()...)
}
}
// 2) Filter nodes
if nodeFilter != "" {
filteredNodes := []string{}
for _, node := range nodes {
if strings.Contains(node, nodeFilter) {
filteredNodes = append(filteredNodes, node)
}
}
nodes = filteredNodes
}
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
totalNodes = len(nodes)
sort.Strings(nodes)
// 3) Apply paging
if len(nodes) > page.ItemsPerPage {
start := (page.Page - 1) * page.ItemsPerPage
end := start + page.ItemsPerPage
if end > len(nodes) {
end = len(nodes)
hasNextPage = false
} else {
hasNextPage = true
}
nodes = nodes[start:end]
}
// 4) Fetch Data, based on pdb.LoadNodeData()
t0 := time.Now()
// Map of hosts of jobData
data := make(map[string]schema.JobData)
// query db for each metric
// TODO: scopes seems to be always empty
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode)
}
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func() {
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue
}
for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(cluster, metric)
if metricConfig == nil {
log.Warnf("Error in LoadNodeListData: Metric %s for cluster %s not configured", metric, cluster)
return nil, totalNodes, hasNextPage, errors.New("Prometheus config error")
}
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
if err != nil {
log.Warn("Error while formatting prometheus query")
return nil, totalNodes, hasNextPage, err
}
// ranged query over all nodes
r := promv1.Range{
Start: from,
End: to,
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
return nil, totalNodes, hasNextPage, errors.New("Prometheus query error")
}
if len(warnings) > 0 {
log.Warnf("Warnings: %v\n", warnings)
}
step := int64(metricConfig.Timestep)
steps := int64(to.Sub(from).Seconds()) / step
// iter rows of host, metric, values
for _, row := range result.(promm.Matrix) {
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
hostdata, ok := data[hostname]
if !ok {
hostdata = make(schema.JobData)
data[hostname] = hostdata
}
metricdata, ok := hostdata[metric]
if !ok {
metricdata = make(map[schema.MetricScope]*schema.JobMetric)
data[hostname][metric] = metricdata
}
// output per host, metric and scope
scopeData, ok := metricdata[scope]
if !ok {
scopeData = &schema.JobMetric{
Unit: metricConfig.Unit,
Timestep: metricConfig.Timestep,
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
}
data[hostname][metric][scope] = scopeData
}
}
}
}
t1 := time.Since(t0)
log.Debugf("LoadNodeListData of %v nodes took %s", len(data), t1)
return data, totalNodes, hasNextPage, nil
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -9,11 +9,10 @@ import (
"encoding/json"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
panic("TODO")
}
@@ -28,25 +27,14 @@ func (tmdr *TestMetricDataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int) (schema.JobData, error) {
ctx context.Context) (schema.JobData, error) {
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
return TestLoadDataCallback(job, metrics, scopes, ctx)
}
func (tmdr *TestMetricDataRepository) LoadStats(
job *schema.Job,
metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
panic("TODO")
}
func (tmdr *TestMetricDataRepository) LoadScopedStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.ScopedJobStats, error) {
metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
panic("TODO")
}
@@ -60,62 +48,3 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
panic("TODO")
}
func (tmdr *TestMetricDataRepository) LoadNodeListData(
cluster, subCluster, nodeFilter string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
from, to time.Time,
page *model.PageRequest,
ctx context.Context,
) (map[string]schema.JobData, int, bool, error) {
panic("TODO")
}
func DeepCopy(jd_temp schema.JobData) schema.JobData {
var jd schema.JobData
jd = make(schema.JobData, len(jd_temp))
for k, v := range jd_temp {
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
for k_, v_ := range v {
jd[k][k_] = new(schema.JobMetric)
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
for i := 0; i < len(v_.Series); i += 1 {
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
jd[k][k_].Series[i].Id = v_.Series[i].Id
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
}
jd[k][k_].Timestep = v_.Timestep
jd[k][k_].Unit.Base = v_.Unit.Base
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
if v_.StatisticsSeries != nil {
// Init Slices
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max))
jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min))
jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median))
jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean))
// Copy Data
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
// Handle Percentiles
for k__, v__ := range v_.StatisticsSeries.Percentiles {
jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__))
copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__)
}
} else {
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
}
}
}
return jd
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -59,15 +59,17 @@ func Connect(driver string, db string) {
} else {
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
}
if err != nil {
log.Fatal(err)
}
case "mysql":
opts.URL += "?multiStatements=true"
dbHandle, err = sqlx.Open("mysql", opts.URL)
if err != nil {
log.Fatalf("sqlx.Open() error: %v", err)
}
default:
log.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver)
}
if err != nil {
log.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error())
log.Fatalf("unsupported database driver: %s", driver)
}
dbHandle.SetMaxOpenConns(opts.MaxOpenConnections)
@@ -78,7 +80,7 @@ func Connect(driver string, db string) {
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
err = checkDBVersion(driver, dbHandle.DB)
if err != nil {
log.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error())
log.Fatal(err)
}
})
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

View File

@@ -1,21 +1,21 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"math"
"strconv"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -29,10 +29,14 @@ var (
)
type JobRepository struct {
DB *sqlx.DB
DB *sqlx.DB
driver string
stmtCache *sq.StmtCache
cache *lrucache.Cache
driver string
archiveChannel chan *schema.Job
archivePending sync.WaitGroup
}
func GetJobRepository() *JobRepository {
@@ -43,48 +47,47 @@ func GetJobRepository() *JobRepository {
DB: db.DB,
driver: db.Driver,
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
archiveChannel: make(chan *schema.Job, 128),
}
// start archiving worker
go jobRepoInstance.archivingWorker()
})
return jobRepoInstance
}
var jobColumns []string = []string{
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
"job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data",
}
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
job := &schema.Job{}
if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
&job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil {
log.Warnf("Error while scanning rows (Job): %v", err)
return nil, err
}
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
log.Warn("Error while unmarshaling raw resources json")
log.Warn("Error while unmarhsaling raw resources json")
return nil, err
}
job.RawResources = nil
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
return nil, err
}
job.RawFootprint = nil
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err
// }
job.StartTime = time.Unix(job.StartTimeUnix, 0)
// Always ensure accurate duration for running jobs
if job.State == schema.JobStateRunning {
if job.Duration == 0 && job.State == schema.JobStateRunning {
job.Duration = int32(time.Since(job.StartTime).Seconds())
}
job.RawResources = nil
return job, nil
}
@@ -203,78 +206,239 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
return err
}
if _, err = sq.Update("job").
Set("meta_data", job.RawMetaData).
Where("job.id = ?", job.ID).
RunWith(r.stmtCache).Exec(); err != nil {
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
return err
}
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
return archive.UpdateMetadata(job, job.MetaData)
return nil
}
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
start := time.Now()
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) Find(
jobId *int64,
cluster *string,
startTime *int64) (*schema.Job, error) {
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
log.Warn("Error while scanning for job footprint")
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
log.Debugf("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindAll(
jobId *int64,
cluster *string,
startTime *int64) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
if len(job.RawFootprint) == 0 {
jobs := make([]*schema.Job, 0, 10)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Debugf("Timer FindAll %s", time.Since(start))
return jobs, nil
}
// FindById executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
func (r *JobRepository) FindConcurrentJobs(
ctx context.Context,
job *schema.Job) (*model.JobLinkResultList, error) {
if job == nil {
return nil, nil
}
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
log.Warn("Error while unmarshaling raw footprint json")
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
if qerr != nil {
return nil, qerr
}
query = query.Where("cluster = ?", job.Cluster)
var startTime int64
var stopTime int64
startTime = job.StartTimeUnix
hostname := job.Resources[0].Hostname
if job.State == schema.JobStateRunning {
stopTime = time.Now().Unix()
} else {
stopTime = startTime + int64(job.Duration)
}
// Add 200s overlap for jobs start time at the end
startTimeTail := startTime + 10
stopTimeTail := stopTime - 200
startTimeFront := startTime + 200
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
"running", startTimeTail, stopTimeTail, startTime)
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
log.Debugf("Timer FetchFootprint %s", time.Since(start))
return job.Footprint, nil
items := make([]*model.JobLink, 0, 10)
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
rows, err = queryRunning.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
cnt := len(items)
return &model.JobLinkResultList{
ListQuery: &queryString,
Items: items,
Count: &cnt,
}, nil
}
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
start := time.Now()
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.EnergyFootprint = cached.(map[string]float64)
return job.EnergyFootprint, nil
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
}
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
log.Warn("Error while scanning for job energy_footprint")
return nil, err
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
if len(job.RawEnergyFootprint) == 0 {
return nil, nil
res, err := r.DB.NamedExec(`INSERT INTO job (
job_id, user, project, cluster, subcluster, `+"`partition`"+`, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data
);`, job)
if err != nil {
return -1, err
}
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
log.Warn("Error while unmarshaling raw energy footprint json")
return nil, err
}
return res.LastInsertId()
}
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
return job.EnergyFootprint, nil
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) Stop(
jobId int64,
duration int32,
state schema.JobState,
monitoringStatus int32) (err error) {
stmt := sq.Update("job").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
var cnt int
q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
q.RunWith(r.DB).QueryRow().Scan(cnt)
qd := sq.Delete("job").Where("job.start_time < ?", startTime)
_, err := qd.RunWith(r.DB).Exec()
qs := fmt.Sprintf("SELECT count(*) FROM job WHERE job.start_time < %d", startTime)
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime)
if err != nil {
s, _, _ := qd.ToSql()
log.Errorf(" DeleteJobsBefore(%d) with %s: error %#v", startTime, s, err)
log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err)
} else {
log.Debugf("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
}
@@ -282,34 +446,128 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
}
func (r *JobRepository) DeleteJobById(id int64) error {
qd := sq.Delete("job").Where("job.id = ?", id)
_, err := qd.RunWith(r.DB).Exec()
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id)
if err != nil {
s, _, _ := qd.ToSql()
log.Errorf("DeleteJobById(%d) with %s : error %#v", id, s, err)
log.Errorf("DeleteJobById(%d): error %#v", id, err)
} else {
log.Debugf("DeleteJobById(%d): Success", id)
}
return err
}
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", job)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) MarkArchived(
jobId int64,
monitoringStatus int32,
metricStats map[string]schema.JobStatistics) error {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
for metric, stats := range metricStats {
switch metric {
case "flops_any":
stmt = stmt.Set("flops_any_avg", stats.Avg)
case "mem_used":
stmt = stmt.Set("mem_used_max", stats.Max)
case "mem_bw":
stmt = stmt.Set("mem_bw_avg", stats.Avg)
case "load":
stmt = stmt.Set("load_avg", stats.Avg)
case "cpu_load":
stmt = stmt.Set("load_avg", stats.Avg)
case "net_bw":
stmt = stmt.Set("net_bw_avg", stats.Avg)
case "file_bw":
stmt = stmt.Set("file_bw_avg", stats.Avg)
default:
log.Debugf("MarkArchived() Metric '%v' unknown", metric)
}
}
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
log.Warn("Error while marking job as archived")
return err
}
return nil
}
// Archiving worker thread
func (r *JobRepository) archivingWorker() {
for {
select {
case job, ok := <-r.archiveChannel:
if !ok {
break
}
start := time.Now()
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := r.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// Update the jobs database entry one last time:
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
continue
}
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
log.Printf("archiving job (dbid: %d) successful", job.ID)
r.archivePending.Done()
}
}
}
// Trigger async archiving
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
r.archivePending.Add(1)
r.archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func (r *JobRepository) WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
r.archivePending.Wait()
}
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
return searchterm, "", "", ""
} else { // Has to have letters and logged-in user for other guesses
if user != nil {
// Find username by username in job table (match)
uresult, _ := r.FindColumnValue(user, searchterm, "job", "hpc_user", "hpc_user", false)
// Find username in jobs (match)
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false)
if uresult != "" {
return "", uresult, "", ""
}
// Find username by real name in hpc_user table (like)
nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true)
// Find username by name (like)
nresult, _ := r.FindColumnValue(user, searchterm, "user", "username", "name", true)
if nresult != "" {
return "", nresult, "", ""
}
// Find projectId by projectId in job table (match)
// Find projectId in jobs (match)
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
if presult != "" {
return "", "", presult, ""
@@ -320,10 +578,8 @@ func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm
}
}
var (
ErrNotFound = errors.New("no such jobname, project or user")
ErrForbidden = errors.New("not authorized")
)
var ErrNotFound = errors.New("no such jobname, project or user")
var ErrForbidden = errors.New("not authorized")
func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) {
compareStr := " = ?"
@@ -391,7 +647,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
return nil, 0, 1000
}
@@ -407,6 +663,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
// Hosts with zero jobs running on them will not show up!
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
start := time.Now()
subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job").
@@ -448,8 +705,8 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
return subclusters, nil
}
// FIXME: Set duration to requested walltime?
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now()
res, err := sq.Update("job").
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
@@ -477,47 +734,8 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
return nil
}
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job").
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
Where("job.job_state = 'running'").
Where("job.duration > 600")
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 50)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Infof("Return job count %d", len(jobs))
return jobs, nil
}
func (r *JobRepository) UpdateDuration() error {
stmnt := sq.Update("job").
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
Where("job_state = 'running'")
_, err := stmnt.RunWith(r.stmtCache).Exec()
if err != nil {
return err
}
return nil
}
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
var query sq.SelectBuilder
if startTimeBegin == startTimeEnd || startTimeBegin > startTimeEnd {
@@ -555,118 +773,27 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
return jobs, nil
}
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", job)
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
return err
}
return nil
}
func (r *JobRepository) MarkArchived(
stmt sq.UpdateBuilder,
monitoringStatus int32,
) sq.UpdateBuilder {
return stmt.Set("monitoring_status", monitoringStatus)
}
func (r *JobRepository) UpdateEnergy(
stmt sq.UpdateBuilder,
jobMeta *schema.JobMeta,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return stmt, err
log.Warn("Error while NamedJobInsert")
return 0, err
}
energyFootprint := make(map[string]float64)
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
energyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
// log.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
}
var rawFootprint []byte
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return stmt, err
}
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
}
func (r *JobRepository) UpdateFootprint(
stmt sq.UpdateBuilder,
jobMeta *schema.JobMeta,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
id, err := res.LastInsertId()
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return stmt, err
}
footprint := make(map[string]float64)
for _, fp := range sc.Footprint {
var statType string
for _, gm := range archive.GlobalMetricList {
if gm.Name == fp {
statType = gm.Footprint
}
}
if statType != "avg" && statType != "min" && statType != "max" {
log.Warnf("unknown statType for footprint update: %s", statType)
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
}
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
footprint[name] = LoadJobStat(jobMeta, fp, statType)
log.Warn("Error while getting last insert ID")
return 0, err
}
var rawFootprint []byte
if rawFootprint, err = json.Marshal(footprint); err != nil {
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return stmt, err
}
return stmt.Set("footprint", string(rawFootprint)), nil
return id, nil
}

View File

@@ -1,75 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"encoding/json"
"fmt"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
const NamedJobInsert string = `INSERT INTO job (
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
) VALUES (
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);`
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
log.Warn("Error while NamedJobInsert")
return 0, err
}
id, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return 0, err
}
return id, nil
}
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
return r.InsertJob(job)
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) Stop(
jobId int64,
duration int32,
state schema.JobState,
monitoringStatus int32,
) (err error) {
stmt := sq.Update("job").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}

View File

@@ -1,263 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"database/sql"
"fmt"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) Find(
jobId *int64,
cluster *string,
startTime *int64,
) (*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
log.Debugf("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindAll(
jobId *int64,
cluster *string,
startTime *int64,
) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 10)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Debugf("Timer FindAll %s", time.Since(start))
return jobs, nil
}
// FindById executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
q, qerr := SecurityCheck(ctx, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdWithUser executes a SQL query to find a specific batch job.
// The job is queried using the database id. The user is passed directly,
// instead as part of the context.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
q, qerr := SecurityCheckWithUser(user, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdDirect executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByJobId executes a SQL query to find a specific batch job.
// The job is queried using the slurm id and the clustername.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").
Where("job.job_id = ?", jobId).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
q, qerr := SecurityCheck(ctx, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// IsJobOwner executes a SQL query to find a specific batch job.
// The job is queried using the slurm id,a username and the cluster.
// It returns a bool.
// If job was found, user is owner: test err != sql.ErrNoRows
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
q := sq.Select("id").
From("job").
Where("job.job_id = ?", jobId).
Where("job.hpc_user = ?", user).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
return err != sql.ErrNoRows
}
func (r *JobRepository) FindConcurrentJobs(
ctx context.Context,
job *schema.Job,
) (*model.JobLinkResultList, error) {
if job == nil {
return nil, nil
}
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
if qerr != nil {
return nil, qerr
}
query = query.Where("cluster = ?", job.Cluster)
var startTime int64
var stopTime int64
startTime = job.StartTimeUnix
hostname := job.Resources[0].Hostname
if job.State == schema.JobStateRunning {
stopTime = time.Now().Unix()
} else {
stopTime = startTime + int64(job.Duration)
}
// Add 200s overlap for jobs start time at the end
startTimeTail := startTime + 10
stopTimeTail := stopTime - 200
startTimeFront := startTime + 200
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
"running", startTimeTail, stopTimeTail, startTime)
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
queryRunning = queryRunning.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
query = query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
items := make([]*model.JobLink, 0, 10)
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
rows, err = queryRunning.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
cnt := len(items)
return &model.JobLinkResultList{
ListQuery: &queryString,
Items: items,
Count: &cnt,
}, nil
}

View File

@@ -1,342 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
func (r *JobRepository) QueryJobs(
ctx context.Context,
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput,
) ([]*schema.Job, error) {
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
}
if order != nil {
field := toSnakeCase(order.Field)
if order.Type == "col" {
// "col": Fixed column name query
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
}
} else {
// "foot": Order by footprint JSON field values
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(meta_data)")
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
}
}
}
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
queryString, queryVars, _ := query.ToSql()
log.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return nil, err
}
jobs := make([]*schema.Job, 0, 50)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows (Jobs)")
return nil, err
}
jobs = append(jobs, job)
}
return jobs, nil
}
func (r *JobRepository) CountJobs(
ctx context.Context,
filters []*model.JobFilter,
) (int, error) {
// DISTICT count for tags filters, does not affect other queries
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
if qerr != nil {
return 0, qerr
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
var count int
if err := query.RunWith(r.DB).Scan(&count); err != nil {
return 0, err
}
return count, nil
}
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
if user == nil {
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user context is nil")
}
switch {
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
return query, nil
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
return query, nil
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
} else {
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.hpc_user = ?", user.Username), nil
}
case user.HasRole(schema.RoleUser): // User : Only personal jobs
return query.Where("job.hpc_user = ?", user.Username), nil
default: // No known Role, return error
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user has no or unknown roles")
}
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx)
return SecurityCheckWithUser(user, query)
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
if filter.Tags != nil {
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
}
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
}
if filter.ArrayJobID != nil {
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
}
if filter.User != nil {
query = buildStringCondition("job.hpc_user", filter.User, query)
}
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.JobName != nil {
query = buildMetaJsonCondition("jobName", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
}
if filter.Duration != nil {
query = buildIntCondition("job.duration", filter.Duration, query)
}
if filter.MinRunningFor != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
}
if filter.Exclusive != nil {
query = query.Where("job.exclusive = ?", *filter.Exclusive)
}
if filter.State != nil {
states := make([]string, len(filter.State))
for i, val := range filter.State {
states[i] = string(val)
}
query = query.Where(sq.Eq{"job.job_state": states})
}
if filter.NumNodes != nil {
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
}
if filter.NumAccelerators != nil {
query = buildIntCondition("job.num_acc", filter.NumAccelerators, query)
}
if filter.NumHWThreads != nil {
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
}
if filter.Node != nil {
query = buildResourceJsonCondition("hostname", filter.Node, query)
}
if filter.Energy != nil {
query = buildFloatCondition("job.energy", filter.Energy, query)
}
if filter.MetricStats != nil {
for _, ms := range filter.MetricStats {
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
}
}
return query
}
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
if cond.From != nil && cond.To != nil {
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
} else if cond.From != nil {
return query.Where("? <= "+field, cond.From.Unix())
} else if cond.To != nil {
return query.Where(field+" <= ?", cond.To.Unix())
} else if cond.Range != "" {
now := time.Now().Unix()
var then int64
switch cond.Range {
case "last6h":
then = now - (60 * 60 * 6)
case "last24h":
then = now - (60 * 60 * 24)
case "last7d":
then = now - (60 * 60 * 24 * 7)
case "last30d":
then = now - (60 * 60 * 24 * 30)
default:
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
return query
}
return query.Where(field+" BETWEEN ? AND ?", then, now)
} else {
return query
}
}
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(footprint)")
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
}
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
if cond.Eq != nil {
return query.Where(field+" = ?", *cond.Eq)
}
if cond.Neq != nil {
return query.Where(field+" != ?", *cond.Neq)
}
if cond.StartsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
}
if cond.EndsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
}
if cond.Contains != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
if cond.In != nil {
queryElements := make([]string, len(cond.In))
copy(queryElements, cond.In)
return query.Where(sq.Or{sq.Eq{field: queryElements}})
}
return query
}
func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(meta_data)")
// add "AND" Sql query Block for field match
if cond.Eq != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq)
}
if cond.Neq != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") != ?", *cond.Neq)
}
if cond.StartsWith != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
}
if cond.EndsWith != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.EndsWith))
}
if cond.Contains != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
return query
}
func buildResourceJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(resources)")
// add "AND" Sql query Block for field match
if cond.Eq != nil {
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") = ?)", *cond.Eq)
}
if cond.Neq != nil { // Currently Unused
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") != ?)", *cond.Neq)
}
if cond.StartsWith != nil { // Currently Unused
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\")) LIKE ?)", fmt.Sprint(*cond.StartsWith, "%"))
}
if cond.EndsWith != nil { // Currently Unused
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") LIKE ?)", fmt.Sprint("%", *cond.EndsWith))
}
if cond.Contains != nil {
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") LIKE ?)", fmt.Sprint("%", *cond.Contains, "%"))
}
return query
}
var (
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
)
func toSnakeCase(str string) string {
for _, c := range str {
if c == '\'' || c == '\\' {
log.Panic("toSnakeCase() attack vector!")
}
}
str = strings.ReplaceAll(str, "'", "")
str = strings.ReplaceAll(str, "\\", "")
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)
}

View File

@@ -1,15 +1,13 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"fmt"
"testing"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
_ "github.com/mattn/go-sqlite3"
)
@@ -32,7 +30,7 @@ func TestFind(t *testing.T) {
func TestFindById(t *testing.T) {
r := setup(t)
job, err := r.FindById(getContext(t), 5)
job, err := r.FindById(5)
if err != nil {
t.Fatal(err)
}
@@ -47,19 +45,7 @@ func TestFindById(t *testing.T) {
func TestGetTags(t *testing.T) {
r := setup(t)
const contextUserKey ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
Projects: make([]string, 0),
Roles: []string{"user"},
AuthType: 0,
AuthSource: 2,
}
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
// Test Tag has Scope "global"
tags, counts, err := r.CountTags(GetUserFromContext(ctx))
tags, counts, err := r.CountTags(nil)
if err != nil {
t.Fatal(err)
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -16,7 +16,7 @@ import (
"github.com/golang-migrate/migrate/v4/source/iofs"
)
const Version uint = 8
const Version uint = 6
//go:embed migrations/*
var migrationFiles embed.FS
@@ -54,10 +54,10 @@ func checkDBVersion(backend string, db *sql.DB) error {
return err
}
default:
log.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
log.Fatalf("unsupported database backend: %s", backend)
}
v, dirty, err := m.Version()
v, _, err := m.Version()
if err != nil {
if err == migrate.ErrNilVersion {
log.Warn("Legacy database without version or missing database file!")
@@ -68,18 +68,18 @@ func checkDBVersion(backend string, db *sql.DB) error {
if v < Version {
return fmt.Errorf("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
} else if v > Version {
return fmt.Errorf("unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool", v, Version)
}
if dirty {
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
if v > Version {
return fmt.Errorf("unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool", v, Version)
}
return nil
}
func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) {
func MigrateDB(backend string, db string) error {
var m *migrate.Migrate
switch backend {
case "sqlite3":
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
@@ -89,37 +89,20 @@ func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err erro
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
if err != nil {
return m, err
return err
}
case "mysql":
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
return m, err
return err
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
if err != nil {
return m, err
return err
}
default:
log.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
}
return m, nil
}
func MigrateDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
if err != nil {
return err
}
v, dirty, err := m.Version()
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
if dirty {
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
log.Fatalf("unsupported database backend: %s", backend)
}
if err := m.Up(); err != nil {
@@ -133,35 +116,3 @@ func MigrateDB(backend string, db string) error {
m.Close()
return nil
}
func RevertDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
if err != nil {
return err
}
if err := m.Migrate(Version - 1); err != nil {
if err == migrate.ErrNoChange {
log.Info("DB already up to date!")
} else {
return err
}
}
m.Close()
return nil
}
func ForceDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
if err != nil {
return err
}
if err := m.Force(int(Version)); err != nil {
return err
}
m.Close()
return nil
}

View File

@@ -1,3 +0,0 @@
SET FOREIGN_KEY_CHECKS = 0;
ALTER TABLE tag MODIFY id INTEGER;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -1,3 +0,0 @@
SET FOREIGN_KEY_CHECKS = 0;
ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -1,83 +0,0 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;
-- Do not use reserved keywords anymore
RENAME TABLE hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_user;
DROP INDEX IF EXISTS jobs_user_starttime;
DROP INDEX IF EXISTS jobs_user_duration;
DROP INDEX IF EXISTS jobs_user_numnodes;
DROP INDEX IF EXISTS jobs_project;
DROP INDEX IF EXISTS jobs_project_user;
DROP INDEX IF EXISTS jobs_project_starttime;
DROP INDEX IF EXISTS jobs_project_duration;
DROP INDEX IF EXISTS jobs_project_numnodes;
DROP INDEX IF EXISTS jobs_jobstate;
DROP INDEX IF EXISTS jobs_jobstate_user;
DROP INDEX IF EXISTS jobs_jobstate_project;
DROP INDEX IF EXISTS jobs_jobstate_starttime;
DROP INDEX IF EXISTS jobs_jobstate_duration;
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_starttime;
DROP INDEX IF EXISTS jobs_duration;
DROP INDEX IF EXISTS jobs_numnodes;
DROP INDEX IF EXISTS jobs_duration_starttime;
DROP INDEX IF EXISTS jobs_numnodes_starttime;
DROP INDEX IF EXISTS jobs_numacc_starttime;
DROP INDEX IF EXISTS jobs_energy_starttime;

View File

@@ -1,123 +0,0 @@
DROP INDEX IF EXISTS job_stats ON job;
DROP INDEX IF EXISTS job_by_user ON job;
DROP INDEX IF EXISTS job_by_starttime ON job;
DROP INDEX IF EXISTS job_by_job_id ON job;
DROP INDEX IF EXISTS job_list ON job;
DROP INDEX IF EXISTS job_list_user ON job;
DROP INDEX IF EXISTS job_list_users ON job;
DROP INDEX IF EXISTS job_list_users_start ON job;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint JSON;
ALTER TABLE job ADD COLUMN footprint JSON;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-- Do not use reserved keywords anymore
RENAME TABLE `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Optimize DB index usage

View File

@@ -1,103 +0,0 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_numacc;
DROP INDEX IF EXISTS jobs_cluster_energy;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
DROP INDEX IF EXISTS jobs_user;
DROP INDEX IF EXISTS jobs_user_starttime;
DROP INDEX IF EXISTS jobs_user_duration;
DROP INDEX IF EXISTS jobs_user_numnodes;
DROP INDEX IF EXISTS jobs_user_numhwthreads;
DROP INDEX IF EXISTS jobs_user_numacc;
DROP INDEX IF EXISTS jobs_user_energy;
DROP INDEX IF EXISTS jobs_project;
DROP INDEX IF EXISTS jobs_project_user;
DROP INDEX IF EXISTS jobs_project_starttime;
DROP INDEX IF EXISTS jobs_project_duration;
DROP INDEX IF EXISTS jobs_project_numnodes;
DROP INDEX IF EXISTS jobs_project_numhwthreads;
DROP INDEX IF EXISTS jobs_project_numacc;
DROP INDEX IF EXISTS jobs_project_energy;
DROP INDEX IF EXISTS jobs_jobstate;
DROP INDEX IF EXISTS jobs_jobstate_user;
DROP INDEX IF EXISTS jobs_jobstate_project;
DROP INDEX IF EXISTS jobs_jobstate_starttime;
DROP INDEX IF EXISTS jobs_jobstate_duration;
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_jobstate_numacc;
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_starttime;
DROP INDEX IF EXISTS jobs_duration;
DROP INDEX IF EXISTS jobs_numnodes;
DROP INDEX IF EXISTS jobs_numhwthreads;
DROP INDEX IF EXISTS jobs_numacc;
DROP INDEX IF EXISTS jobs_energy;
DROP INDEX IF EXISTS jobs_duration_starttime;
DROP INDEX IF EXISTS jobs_numnodes_starttime;
DROP INDEX IF EXISTS jobs_numhwthreads_starttime;
DROP INDEX IF EXISTS jobs_numacc_starttime;
DROP INDEX IF EXISTS jobs_energy_starttime;

View File

@@ -1,142 +0,0 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_list;
DROP INDEX IF EXISTS job_list_user;
DROP INDEX IF EXISTS job_list_users;
DROP INDEX IF EXISTS job_list_users_start;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-- Do not use reserved keywords anymore
ALTER TABLE "user" RENAME TO hpc_user;
ALTER TABLE job RENAME COLUMN "user" TO hpc_user;
ALTER TABLE job RENAME COLUMN "partition" TO cluster_partition;
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Optimize DB index usage
PRAGMA optimize;

View File

@@ -0,0 +1,253 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
func (r *JobRepository) QueryJobs(
ctx context.Context,
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
}
if order != nil {
field := toSnakeCase(order.Field)
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
}
}
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
jobs := make([]*schema.Job, 0, 50)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows (Jobs)")
return nil, err
}
jobs = append(jobs, job)
}
return jobs, nil
}
func (r *JobRepository) CountJobs(
ctx context.Context,
filters []*model.JobFilter) (int, error) {
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
if qerr != nil {
return 0, qerr
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
var count int
if err := query.RunWith(r.DB).Scan(&count); err != nil {
return 0, err
}
return count, nil
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx)
if user == nil {
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user context is nil")
} else if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleApi}) { // Admin & Co. : All jobs
return query, nil
} else if user.HasRole(schema.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
} else {
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.user = ?", user.Username), nil
}
} else if user.HasRole(schema.RoleUser) { // User : Only personal jobs
return query.Where("job.user = ?", user.Username), nil
} else {
// Shortterm compatibility: Return User-Query if no roles:
return query.Where("job.user = ?", user.Username), nil
// // On the longterm: Return Error instead of fallback:
// var qnil sq.SelectBuilder
// return qnil, fmt.Errorf("user '%s' with unknown roles [%#v]", user.Username, user.Roles)
}
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
if filter.Tags != nil {
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
}
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
}
if filter.ArrayJobID != nil {
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
}
if filter.User != nil {
query = buildStringCondition("job.user", filter.User, query)
}
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.JobName != nil {
query = buildStringCondition("job.meta_data", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.partition", filter.Partition, query)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
}
if filter.Duration != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
}
if filter.MinRunningFor != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
}
if filter.State != nil {
states := make([]string, len(filter.State))
for i, val := range filter.State {
states[i] = string(val)
}
query = query.Where(sq.Eq{"job.job_state": states})
}
if filter.NumNodes != nil {
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
}
if filter.NumAccelerators != nil {
query = buildIntCondition("job.num_acc", filter.NumAccelerators, query)
}
if filter.NumHWThreads != nil {
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
}
if filter.Node != nil {
query = buildStringCondition("job.resources", filter.Node, query)
}
if filter.FlopsAnyAvg != nil {
query = buildFloatCondition("job.flops_any_avg", filter.FlopsAnyAvg, query)
}
if filter.MemBwAvg != nil {
query = buildFloatCondition("job.mem_bw_avg", filter.MemBwAvg, query)
}
if filter.LoadAvg != nil {
query = buildFloatCondition("job.load_avg", filter.LoadAvg, query)
}
if filter.MemUsedMax != nil {
query = buildFloatCondition("job.mem_used_max", filter.MemUsedMax, query)
}
return query
}
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
if cond.From != nil && cond.To != nil {
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
} else if cond.From != nil {
return query.Where("? <= "+field, cond.From.Unix())
} else if cond.To != nil {
return query.Where(field+" <= ?", cond.To.Unix())
} else {
return query
}
}
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
if cond.Eq != nil {
return query.Where(field+" = ?", *cond.Eq)
}
if cond.Neq != nil {
return query.Where(field+" != ?", *cond.Neq)
}
if cond.StartsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
}
if cond.EndsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
}
if cond.Contains != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
if cond.In != nil {
queryElements := make([]string, len(cond.In))
for i, val := range cond.In {
queryElements[i] = val
}
return query.Where(sq.Or{sq.Eq{field: queryElements}})
}
return query
}
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
func toSnakeCase(str string) string {
for _, c := range str {
if c == '\'' || c == '\\' {
log.Panic("toSnakeCase() attack vector!")
}
}
str = strings.ReplaceAll(str, "'", "")
str = strings.ReplaceAll(str, "\\", "")
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -55,7 +55,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := db.FindById(getContext(b), jobId)
_, err := db.FindById(jobId)
noErr(b, err)
}
})
@@ -111,7 +111,7 @@ func BenchmarkDB_QueryJobs(b *testing.B) {
user := "mppi133h"
filter.User = &model.StringInput{Eq: &user}
page := &model.PageRequest{ItemsPerPage: 50, Page: 1}
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
b.Run("QueryJobs", func(b *testing.B) {
db := setup(b)

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -8,11 +8,12 @@ import (
"context"
"database/sql"
"fmt"
"math"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -21,7 +22,7 @@ import (
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.hpc_user",
model.AggregateUser: "job.user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
@@ -40,8 +41,8 @@ var sortBy2column = map[model.SortByAggregate]string{
func (r *JobRepository) buildCountQuery(
filter []*model.JobFilter,
kind string,
col string,
) sq.SelectBuilder {
col string) sq.SelectBuilder {
var query sq.SelectBuilder
if col != "" {
@@ -68,16 +69,16 @@ func (r *JobRepository) buildCountQuery(
func (r *JobRepository) buildStatsQuery(
filter []*model.JobFilter,
col string,
) sq.SelectBuilder {
col string) sq.SelectBuilder {
var query sq.SelectBuilder
castType := r.getCastType()
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
if col != "" {
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
@@ -85,9 +86,10 @@ func (r *JobRepository) buildStatsQuery(
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
).From("job").GroupBy(col)
} else {
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
@@ -106,15 +108,15 @@ func (r *JobRepository) buildStatsQuery(
return query
}
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
// user := GetUserFromContext(ctx)
// name, _ := r.FindColumnValue(user, id, "hpc_user", "name", "username", false)
// if name != "" {
// return name
// } else {
// return "-"
// }
// }
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
user := GetUserFromContext(ctx)
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
if name != "" {
return name
} else {
return "-"
}
}
func (r *JobRepository) getCastType() string {
var castType string
@@ -136,8 +138,8 @@ func (r *JobRepository) JobsStatsGrouped(
filter []*model.JobFilter,
page *model.PageRequest,
sortBy *model.SortByAggregate,
groupBy *model.Aggregate,
) ([]*model.JobsStatistics, error) {
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
start := time.Now()
col := groupBy2column[*groupBy]
query := r.buildStatsQuery(filter, col)
@@ -166,20 +168,14 @@ func (r *JobRepository) JobsStatsGrouped(
for rows.Next() {
var id sql.NullString
var name sql.NullString
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
var personName string
if name.Valid {
personName = name.String
}
if jobs.Valid {
totalJobs = int(jobs.Int64)
@@ -209,12 +205,12 @@ func (r *JobRepository) JobsStatsGrouped(
totalAccHours = int(accHours.Int64)
}
if col == "job.hpc_user" {
// name := r.getUserName(ctx, id.String)
if col == "job.user" {
name := r.getUserName(ctx, id.String)
stats = append(stats,
&model.JobsStatistics{
ID: id.String,
Name: personName,
Name: name,
TotalJobs: totalJobs,
TotalWalltime: totalWalltime,
TotalNodes: totalNodes,
@@ -222,8 +218,7 @@ func (r *JobRepository) JobsStatsGrouped(
TotalCores: totalCores,
TotalCoreHours: totalCoreHours,
TotalAccs: totalAccs,
TotalAccHours: totalAccHours,
})
TotalAccHours: totalAccHours})
} else {
stats = append(stats,
&model.JobsStatistics{
@@ -235,8 +230,7 @@ func (r *JobRepository) JobsStatsGrouped(
TotalCores: totalCores,
TotalCoreHours: totalCoreHours,
TotalAccs: totalAccs,
TotalAccHours: totalAccHours,
})
TotalAccHours: totalAccHours})
}
}
}
@@ -247,8 +241,8 @@ func (r *JobRepository) JobsStatsGrouped(
func (r *JobRepository) JobsStats(
ctx context.Context,
filter []*model.JobFilter,
) ([]*model.JobsStatistics, error) {
filter []*model.JobFilter) ([]*model.JobsStatistics, error) {
start := time.Now()
query := r.buildStatsQuery(filter, "")
query, err := SecurityCheck(ctx, query)
@@ -283,36 +277,18 @@ func (r *JobRepository) JobsStats(
TotalWalltime: int(walltime.Int64),
TotalNodeHours: totalNodeHours,
TotalCoreHours: totalCoreHours,
TotalAccHours: totalAccHours,
})
TotalAccHours: totalAccHours})
}
log.Debugf("Timer JobStats %s", time.Since(start))
return stats, nil
}
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
if stats, ok := job.Statistics[metric]; ok {
switch statType {
case "avg":
return stats.Avg
case "max":
return stats.Max
case "min":
return stats.Min
default:
log.Errorf("Unknown stat type %s", statType)
}
}
return 0.0
}
func (r *JobRepository) JobCountGrouped(
ctx context.Context,
filter []*model.JobFilter,
groupBy *model.Aggregate,
) ([]*model.JobsStatistics, error) {
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
start := time.Now()
col := groupBy2column[*groupBy]
query := r.buildCountQuery(filter, "", col)
@@ -339,8 +315,7 @@ func (r *JobRepository) JobCountGrouped(
stats = append(stats,
&model.JobsStatistics{
ID: id.String,
TotalJobs: int(cnt.Int64),
})
TotalJobs: int(cnt.Int64)})
}
}
@@ -353,8 +328,8 @@ func (r *JobRepository) AddJobCountGrouped(
filter []*model.JobFilter,
groupBy *model.Aggregate,
stats []*model.JobsStatistics,
kind string,
) ([]*model.JobsStatistics, error) {
kind string) ([]*model.JobsStatistics, error) {
start := time.Now()
col := groupBy2column[*groupBy]
query := r.buildCountQuery(filter, kind, col)
@@ -401,8 +376,8 @@ func (r *JobRepository) AddJobCount(
ctx context.Context,
filter []*model.JobFilter,
stats []*model.JobsStatistics,
kind string,
) ([]*model.JobsStatistics, error) {
kind string) ([]*model.JobsStatistics, error) {
start := time.Now()
query := r.buildCountQuery(filter, kind, "")
query, err := SecurityCheck(ctx, query)
@@ -445,41 +420,15 @@ func (r *JobRepository) AddJobCount(
func (r *JobRepository) AddHistograms(
ctx context.Context,
filter []*model.JobFilter,
stat *model.JobsStatistics,
durationBins *string,
) (*model.JobsStatistics, error) {
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
start := time.Now()
var targetBinCount int
var targetBinSize int
switch {
case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
targetBinCount = 60
targetBinSize = 60
case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
targetBinCount = 72
targetBinSize = 600
case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
targetBinCount = 48
targetBinSize = 3600
case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
targetBinCount = 12
targetBinSize = 21600
case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
targetBinCount = 14
targetBinSize = 43200
default: // 24h
targetBinCount = 24
targetBinSize = 3600
}
castType := r.getCastType()
var err error
// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType)
stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter)
if err != nil {
log.Warn("Error while loading job statistics histogram: job duration")
log.Warn("Error while loading job statistics histogram: running jobs")
return nil, err
}
@@ -510,16 +459,14 @@ func (r *JobRepository) AddMetricHistograms(
ctx context.Context,
filter []*model.JobFilter,
metrics []string,
stat *model.JobsStatistics,
targetBinCount *int,
) (*model.JobsStatistics, error) {
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
start := time.Now()
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins
for _, f := range filter {
if f.State != nil {
if len(f.State) == 1 && f.State[0] == "running" {
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter, targetBinCount)
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter)
log.Debugf("Timer AddMetricHistograms %s", time.Since(start))
return stat, nil
}
@@ -528,7 +475,7 @@ func (r *JobRepository) AddMetricHistograms(
// All other cases: Query and make bins in sqlite directly
for _, m := range metrics {
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter, targetBinCount)
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter)
if err != nil {
log.Warnf("Error while loading job metric statistics histogram: %s", m)
continue
@@ -544,8 +491,8 @@ func (r *JobRepository) AddMetricHistograms(
func (r *JobRepository) jobsStatisticsHistogram(
ctx context.Context,
value string,
filters []*model.JobFilter,
) ([]*model.HistoPoint, error) {
filters []*model.JobFilter) ([]*model.HistoPoint, error) {
start := time.Now()
query, qerr := SecurityCheck(ctx,
sq.Select(value, "COUNT(job.id) AS count").From("job"))
@@ -565,7 +512,6 @@ func (r *JobRepository) jobsStatisticsHistogram(
}
points := make([]*model.HistoPoint, 0)
// is it possible to introduce zero values here? requires info about bincount
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
@@ -579,79 +525,39 @@ func (r *JobRepository) jobsStatisticsHistogram(
return points, nil
}
func (r *JobRepository) jobsDurationStatisticsHistogram(
ctx context.Context,
value string,
filters []*model.JobFilter,
binSizeSeconds int,
targetBinCount *int,
) ([]*model.HistoPoint, error) {
start := time.Now()
query, qerr := SecurityCheck(ctx,
sq.Select(value, "COUNT(job.id) AS count").From("job"))
if qerr != nil {
return nil, qerr
}
// Setup Array
points := make([]*model.HistoPoint, 0)
for i := 1; i <= *targetBinCount; i++ {
point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
points = append(points, &point)
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
// Fill Array at matching $Value
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
for _, e := range points {
if e.Value == (point.Value * binSizeSeconds) {
// Note:
// Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
// causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
e.Count = point.Count
break
}
}
}
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
return points, nil
}
func (r *JobRepository) jobsMetricStatisticsHistogram(
ctx context.Context,
metric string,
filters []*model.JobFilter,
bins *int,
) (*model.MetricHistoPoints, error) {
filters []*model.JobFilter) (*model.MetricHistoPoints, error) {
var dbMetric string
switch metric {
case "cpu_load":
dbMetric = "load_avg"
case "flops_any":
dbMetric = "flops_any_avg"
case "mem_bw":
dbMetric = "mem_bw_avg"
case "mem_used":
dbMetric = "mem_used_max"
case "net_bw":
dbMetric = "net_bw_avg"
case "file_bw":
dbMetric = "file_bw_avg"
default:
return nil, fmt.Errorf("%s not implemented", metric)
}
// Get specific Peak or largest Peak
var metricConfig *schema.MetricConfig
var peak float64
var unit string
var footprintStat string
var peak float64 = 0.0
var unit string = ""
for _, f := range filters {
if f.Cluster != nil {
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
peak = metricConfig.Peak
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
footprintStat = metricConfig.Footprint
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
}
}
@@ -666,40 +572,58 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
if unit == "" {
unit = m.Unit.Prefix + m.Unit.Base
}
if footprintStat == "" {
footprintStat = m.Footprint
}
}
}
}
}
// log.Debugf("Metric %s, Peak %f, Unit %s", metric, peak, unit)
// Make bins, see https://jereze.com/code/sql-histogram/ (Modified here)
// log.Debugf("Metric %s: DB %s, Peak %f, Unit %s", metric, dbMetric, peak, unit)
// Make bins, see https://jereze.com/code/sql-histogram/
start := time.Now()
// Find Jobs' Value Bin Number: Divide Value by Peak, Multiply by RequestedBins, then CAST to INT: Gets Bin-Number of Job
binQuery := fmt.Sprintf(`CAST(
((case when json_extract(footprint, "$.%s") = %f then %f*0.999999999 else json_extract(footprint, "$.%s") end) / %f)
* %v as INTEGER )`,
(metric + "_" + footprintStat), peak, peak, (metric + "_" + footprintStat), peak, *bins)
crossJoinQuery := sq.Select(
fmt.Sprintf(`max(%s) as max`, dbMetric),
fmt.Sprintf(`min(%s) as min`, dbMetric),
).From("job").Where(
fmt.Sprintf(`%s is not null`, dbMetric),
).Where(
fmt.Sprintf(`%s <= %f`, dbMetric, peak),
)
crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery)
if cjqerr != nil {
return nil, cjqerr
}
for _, f := range filters {
crossJoinQuery = BuildWhereClause(f, crossJoinQuery)
}
crossJoinQuerySql, crossJoinQueryArgs, sqlerr := crossJoinQuery.ToSql()
if sqlerr != nil {
return nil, sqlerr
}
bins := 10
binQuery := fmt.Sprintf(`CAST( (case when job.%s = value.max then value.max*0.999999999 else job.%s end - value.min) / (value.max - value.min) * %d as INTEGER )`, dbMetric, dbMetric, bins)
mainQuery := sq.Select(
fmt.Sprintf(`%s + 1 as bin`, binQuery),
fmt.Sprintf(`count(*) as count`),
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
).From("job").Where(
"JSON_VALID(footprint)",
).Where(fmt.Sprintf(`json_extract(footprint, "$.%s") is not null and json_extract(footprint, "$.%s") <= %f`, (metric + "_" + footprintStat), (metric + "_" + footprintStat), peak))
fmt.Sprintf(`count(job.%s) as count`, dbMetric),
fmt.Sprintf(`CAST(((value.max / %d) * (%s )) as INTEGER ) as min`, bins, binQuery),
fmt.Sprintf(`CAST(((value.max / %d) * (%s + 1 )) as INTEGER ) as max`, bins, binQuery),
).From("job").CrossJoin(
fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), crossJoinQueryArgs...,
).Where(fmt.Sprintf(`job.%s is not null and job.%s <= %f`, dbMetric, dbMetric, peak))
// Only accessible Jobs...
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
if qerr != nil {
return nil, qerr
}
// Filters...
for _, f := range filters {
mainQuery = BuildWhereClause(f, mainQuery)
}
@@ -713,41 +637,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
return nil, err
}
// Setup Return Array With Bin-Numbers for Match and Min/Max based on Peak
points := make([]*model.MetricHistoPoint, 0)
binStep := int(peak) / *bins
for i := 1; i <= *bins; i++ {
binMin := (binStep * (i - 1))
binMax := (binStep * i)
epoint := model.MetricHistoPoint{Bin: &i, Count: 0, Min: &binMin, Max: &binMax}
points = append(points, &epoint)
}
for rows.Next() { // Fill Count if Bin-No. Matches (Not every Bin exists in DB!)
rpoint := model.MetricHistoPoint{}
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil { // Required for Debug: &rpoint.Min, &rpoint.Max
for rows.Next() {
point := model.MetricHistoPoint{}
if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil {
log.Warnf("Error while scanning rows for %s", metric)
return nil, err // FIXME: Totally bricks cc-backend if returned and if all metrics requested?
return nil, err // Totally bricks cc-backend if returned and if all metrics requested?
}
for _, e := range points {
if e.Bin != nil && rpoint.Bin != nil {
if *e.Bin == *rpoint.Bin {
e.Count = rpoint.Count
// Only Required For Debug: Check DB returned Min/Max against Backend Init above
// if rpoint.Min != nil {
// log.Warnf(">>>> Bin %d Min Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Min, *e.Min)
// }
// if rpoint.Max != nil {
// log.Warnf(">>>> Bin %d Max Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Max, *e.Max)
// }
break
}
}
}
points = append(points, &point)
}
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Stat: &footprintStat, Data: points}
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Data: points}
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
return &result, nil
@@ -756,9 +657,7 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
ctx context.Context,
metrics []string,
filters []*model.JobFilter,
bins *int,
) []*model.MetricHistoPoints {
filters []*model.JobFilter) []*model.MetricHistoPoints {
// Get Jobs
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
@@ -782,7 +681,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
continue
}
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Errorf("Error while loading averages for histogram: %s", err)
return nil
}
@@ -793,14 +692,15 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
for idx, metric := range metrics {
// Get specific Peak or largest Peak
var metricConfig *schema.MetricConfig
var peak float64
var unit string
var peak float64 = 0.0
var unit string = ""
for _, f := range filters {
if f.Cluster != nil {
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
peak = metricConfig.Peak
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
}
}
@@ -820,24 +720,28 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
}
// Make and fill bins
peakBin := int(peak) / *bins
bins := 10.0
peakBin := peak / bins
points := make([]*model.MetricHistoPoint, 0)
for b := 0; b < *bins; b++ {
for b := 0; b < 10; b++ {
count := 0
bindex := b + 1
bmin := peakBin * b
bmax := peakBin * (b + 1)
bmin := math.Round(peakBin * float64(b))
bmax := math.Round(peakBin * (float64(b) + 1.0))
// Iterate AVG values for indexed metric and count for bins
for _, val := range avgs[idx] {
if int(val) >= bmin && int(val) < bmax {
if float64(val) >= bmin && float64(val) < bmax {
count += 1
}
}
bminint := int(bmin)
bmaxint := int(bmax)
// Append Bin to Metric Result Array
point := model.MetricHistoPoint{Bin: &bindex, Count: count, Min: &bmin, Max: &bmax}
point := model.MetricHistoPoint{Bin: &bindex, Count: count, Min: &bminint, Max: &bmaxint}
points = append(points, &point)
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

View File

@@ -1,11 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"fmt"
"strings"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -15,173 +14,53 @@ import (
)
// Add the tag with id `tagId` to the job with the database id `jobId`.
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIdWithUser(user, job)
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil {
log.Error("Error while running query")
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error adding tag with %s: %v", s, err)
return nil, err
}
tags, err := r.GetTags(user, &job)
tags, err := r.GetTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
return tags, archive.UpdateTags(j, tags)
}
// Removes a tag from a job by tag id
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIdWithUser(user, job)
// Removes a tag from a job
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil {
log.Error("Error while running query")
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tag)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error removing tag with %s: %v", s, err)
return nil, err
}
tags, err := r.GetTags(user, &job)
tags, err := r.GetTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
}
// Removes a tag from a job by tag info
func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
// Get Tag ID to delete
tagID, exists := r.TagId(tagType, tagName, tagScope)
if !exists {
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
return nil, fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
}
// Get Job
j, err := r.FindByIdWithUser(user, job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
// Handle Delete
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tagID)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error removing tag from table 'jobTag' with %s: %v", s, err)
return nil, err
}
tags, err := r.GetTags(user, &job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
}
// Removes a tag from db by tag info
func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
// Get Tag ID to delete
tagID, exists := r.TagId(tagType, tagName, tagScope)
if !exists {
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
return fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
}
// Handle Delete JobTagTable
qJobTag := sq.Delete("jobtag").Where("jobtag.tag_id = ?", tagID)
if _, err := qJobTag.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := qJobTag.ToSql()
log.Errorf("Error removing tag from table 'jobTag' with %s: %v", s, err)
return err
}
// Handle Delete TagTable
qTag := sq.Delete("tag").Where("tag.id = ?", tagID)
if _, err := qTag.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := qTag.ToSql()
log.Errorf("Error removing tag from table 'tag' with %s: %v", s, err)
return err
}
return nil
}
// Removes a tag from db by tag id
func (r *JobRepository) RemoveTagById(tagID int64) error {
// Handle Delete JobTagTable
qJobTag := sq.Delete("jobtag").Where("jobtag.tag_id = ?", tagID)
if _, err := qJobTag.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := qJobTag.ToSql()
log.Errorf("Error removing tag from table 'jobTag' with %s: %v", s, err)
return err
}
// Handle Delete TagTable
qTag := sq.Delete("tag").Where("tag.id = ?", tagID)
if _, err := qTag.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := qTag.ToSql()
log.Errorf("Error removing tag from table 'tag' with %s: %v", s, err)
return err
}
return nil
return tags, archive.UpdateTags(j, tags)
}
// CreateTag creates a new tag with the specified type and name and returns its database id.
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
}
q := sq.Insert("tag").Columns("tag_type", "tag_name", "tag_scope").Values(tagType, tagName, tagScope)
res, err := q.RunWith(r.stmtCache).Exec()
func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64, err error) {
res, err := r.stmtCache.Exec("INSERT INTO tag (tag_type, tag_name) VALUES ($1, $2)", tagType, tagName)
if err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error inserting tag with %s: %v", s, err)
return 0, err
}
@@ -189,9 +68,8 @@ func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope strin
}
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
// Fetch all Tags in DB for Display in Frontend Tag-View
tags = make([]schema.Tag, 0, 100)
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name FROM tag")
if err != nil {
return nil, nil, err
}
@@ -201,42 +79,22 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
if err = xrows.StructScan(&t); err != nil {
return nil, nil, err
}
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
readable, err := r.checkScopeAuth(user, "read", t.Scope)
if err != nil {
return nil, nil, err
}
if readable {
tags = append(tags, t)
}
tags = append(tags, t)
}
// Query and Count Jobs with attached Tags
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
q := sq.Select("t.tag_name, count(jt.tag_id)").
From("tag t").
LeftJoin("jobtag jt ON t.id = jt.tag_id").
GroupBy("t.tag_name")
// Handle Scope Filtering
scopeList := "\"global\""
if user != nil {
scopeList += ",\"" + user.Username + "\""
}
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
scopeList += ",\"admin\""
}
q = q.Where("t.tag_scope IN (" + scopeList + ")")
// Handle Job Ownership
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
// log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
// Build ("project1", "project2", ...) list of variable length directly in SQL string
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
} else if user != nil { // USER OR NO ROLE (Compatibility): Only count own jobs
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ?)", user.Username)
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ?)", user.Username)
}
rows, err := q.RunWith(r.stmtCache).Query()
@@ -247,44 +105,29 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
counts = make(map[string]int)
for rows.Next() {
var tagName string
var tagId int
var count int
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
if err = rows.Scan(&tagName, &count); err != nil {
return nil, nil, err
}
// Use tagId as second Map-Key component to differentiate tags with identical names
counts[fmt.Sprint(tagName, tagId)] = count
counts[tagName] = count
}
err = rows.Err()
return tags, counts, err
return
}
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
// If such a tag does not yet exist, it is created.
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
}
writable, err := r.checkScopeAuth(user, "write", tagScope)
if err != nil {
return 0, err
}
if !writable {
return 0, fmt.Errorf("cannot write tag scope with current authorization")
}
tagId, exists := r.TagId(tagType, tagName, tagScope)
func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName string) (tagId int64, err error) {
tagId, exists := r.TagId(tagType, tagName)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
tagId, err = r.CreateTag(tagType, tagName)
if err != nil {
return 0, err
}
}
if _, err := r.AddTag(user, jobId, tagId); err != nil {
if _, err := r.AddTag(jobId, tagId); err != nil {
return 0, err
}
@@ -292,78 +135,33 @@ func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType s
}
// TagId returns the database id of the tag with the specified type and name.
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) {
exists = true
if err := sq.Select("id").From("tag").
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
exists = false
}
return
}
// TagInfo returns the database infos of the tag with the specified id.
func (r *JobRepository) TagInfo(tagId int64) (tagType string, tagName string, tagScope string, exists bool) {
exists = true
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagId).
RunWith(r.stmtCache).QueryRow().Scan(&tagType, &tagName, &tagScope); err != nil {
exists = false
}
return
}
// GetTags returns a list of all scoped tags if job is nil or of the tags that the job with that database ID has.
func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
// GetTags returns a list of all tags if job is nil or of the tags that the job with that database ID has.
func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name").From("tag")
if job != nil {
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error get tags with %s: %v", s, err)
log.Error("Error while running query")
return nil, err
}
tags := make([]*schema.Tag, 0)
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
readable, err := r.checkScopeAuth(user, "read", tag.Scope)
if err != nil {
return nil, err
}
if readable {
tags = append(tags, tag)
}
}
return tags, nil
}
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
if job != nil {
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error get tags with %s: %v", s, err)
return nil, err
}
tags := make([]*schema.Tag, 0)
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
@@ -372,59 +170,3 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
return tags, nil
}
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
tagId, exists := r.TagId(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return err
}
}
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error adding tag on import with %s: %v", s, err)
return err
}
return nil
}
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
if user != nil {
switch {
case operation == "write" && scope == "admin":
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
return true, nil
}
return false, nil
case operation == "write" && scope == "global":
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
return true, nil
}
return false, nil
case operation == "write" && scope == user.Username:
return true, nil
case operation == "read" && scope == "admin":
return user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}), nil
case operation == "read" && scope == "global":
return true, nil
case operation == "read" && scope == user.Username:
return true, nil
default:
if operation == "read" || operation == "write" {
// No acceptable scope: deny tag
return false, nil
} else {
return false, fmt.Errorf("error while checking tag operation auth: unknown operation (%s)", operation)
}
}
} else {
return false, fmt.Errorf("error while checking tag operation auth: no user in context")
}
}

Binary file not shown.

BIN
internal/repository/testdata/job.db-shm vendored Normal file

Binary file not shown.

View File

@@ -1,4 +1,4 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -6,6 +6,7 @@ package repository
import (
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/jmoiron/sqlx"
)
@@ -17,12 +18,20 @@ type Transaction struct {
func (r *JobRepository) TransactionInit() (*Transaction, error) {
var err error
t := new(Transaction)
// Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT.
t.tx, err = r.DB.Beginx()
if err != nil {
log.Warn("Error while bundling transactions")
return nil, err
}
t.stmt, err = t.tx.PrepareNamed(NamedJobInsert)
if err != nil {
log.Warn("Error while preparing namedJobInsert")
return nil, err
}
return t, nil
}
@@ -41,6 +50,7 @@ func (r *JobRepository) TransactionCommit(t *Transaction) error {
return err
}
t.stmt = t.tx.NamedStmt(t.stmt)
return nil
}
@@ -49,17 +59,14 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
log.Warn("Error while committing SQL transactions")
return err
}
return nil
}
func (r *JobRepository) TransactionAddNamed(
t *Transaction,
query string,
args ...interface{},
) (int64, error) {
res, err := t.tx.NamedExec(query, args)
func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, error) {
res, err := t.stmt.Exec(job)
if err != nil {
log.Errorf("Named Exec failed: %v", err)
log.Errorf("repository initDB(): %v", err)
return 0, err
}
@@ -72,19 +79,26 @@ func (r *JobRepository) TransactionAddNamed(
return id, nil
}
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
res, err := t.tx.Exec(query, args...)
func (r *JobRepository) TransactionAddTag(t *Transaction, tag *schema.Tag) (int64, error) {
res, err := t.tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil {
log.Errorf("TransactionAdd(), Exec() Error: %v", err)
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
return 0, err
}
tagId, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return 0, err
}
id, err := res.LastInsertId()
if err != nil {
log.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
return 0, err
}
return id, nil
return tagId, nil
}
func (r *JobRepository) TransactionSetTag(t *Transaction, jobId int64, tagId int64) error {
if _, err := t.tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, jobId, tagId); err != nil {
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", jobId, tagId)
return err
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More