Merge branch 'master' into import-data-sanitation

This commit is contained in:
Jan Eitzinger
2023-04-07 08:57:42 +02:00
110 changed files with 8191 additions and 1464 deletions

View File

@@ -1,6 +1,4 @@
// Package api GENERATED BY SWAG; DO NOT EDIT
// This file was generated by swaggo/swag at
// 2022-09-22 13:31:53.353204065 +0200 CEST m=+0.139444562
// Code generated by swaggo/swag. DO NOT EDIT
package api
import "github.com/swaggo/swag"
@@ -11,7 +9,6 @@ const docTemplate = `{
"info": {
"description": "{{escape .Description}}",
"title": "{{.Title}}",
"termsOfService": "https://monitoring.nhr.fau.de/imprint",
"contact": {
"name": "ClusterCockpit Project",
"url": "https://github.com/ClusterCockpit",
@@ -34,12 +31,12 @@ const docTemplate = `{
}
],
"description": "Get a list of all jobs. Filters can be applied using query parameters.\nNumber of results can be limited by page. Results are sorted by descending startTime.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"query"
],
"summary": "Lists all jobs",
"parameters": [
{
@@ -70,13 +67,13 @@ const docTemplate = `{
},
{
"type": "integer",
"description": "Items per page (If empty: No Limit)",
"description": "Items per page (Default: 25)",
"name": "items-per-page",
"in": "query"
},
{
"type": "integer",
"description": "Page Number (If empty: No Paging)",
"description": "Page Number (Default: 1)",
"name": "page",
"in": "query"
},
@@ -89,12 +86,9 @@ const docTemplate = `{
],
"responses": {
"200": {
"description": "Array of matching jobs",
"description": "Job array and page info",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.Job"
}
"$ref": "#/definitions/api.GetJobsApiResponse"
}
},
"400": {
@@ -109,6 +103,227 @@ const docTemplate = `{
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/delete_job/": {
"delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to delete is specified by request body. All fields are required in this case.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
{
"description": "All fields required",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.DeleteJobApiRequest"
}
}
],
"responses": {
"200": {
"description": "Success message",
"schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/delete_job/{id}": {
"delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to remove is specified by database ID. This will not remove the job from the job archive.",
"produces": [
"application/json"
],
"tags": [
"remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
{
"type": "integer",
"description": "Database ID of Job",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Success message",
"schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/delete_job_before/{ts}": {
"delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.",
"produces": [
"application/json"
],
"tags": [
"remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
{
"type": "integer",
"description": "Unix epoch timestamp",
"name": "ts",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Success message",
"schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
@@ -132,6 +347,9 @@ const docTemplate = `{
"produces": [
"application/json"
],
"tags": [
"add and modify"
],
"summary": "Adds a new job as \"running\"",
"parameters": [
{
@@ -195,6 +413,9 @@ const docTemplate = `{
"produces": [
"application/json"
],
"tags": [
"add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
@@ -209,7 +430,7 @@ const docTemplate = `{
],
"responses": {
"200": {
"description": "Job resource",
"description": "Success message",
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
@@ -267,6 +488,9 @@ const docTemplate = `{
"produces": [
"application/json"
],
"tags": [
"add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
@@ -346,6 +570,9 @@ const docTemplate = `{
"produces": [
"application/json"
],
"tags": [
"add and modify"
],
"summary": "Adds one or more tags to a job",
"parameters": [
{
@@ -363,7 +590,7 @@ const docTemplate = `{
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/api.Tag"
"$ref": "#/definitions/api.ApiTag"
}
}
}
@@ -404,8 +631,53 @@ const docTemplate = `{
}
},
"definitions": {
"api.ApiTag": {
"type": "object",
"properties": {
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}
}
},
"api.DeleteJobApiRequest": {
"type": "object",
"required": [
"jobId"
],
"properties": {
"cluster": {
"description": "Cluster of job",
"type": "string",
"example": "fritz"
},
"jobId": {
"description": "Cluster Job ID of job",
"type": "integer",
"example": 123000
},
"startTime": {
"description": "Start Time of job as epoch",
"type": "integer",
"example": 1649723812
}
}
},
"api.DeleteJobApiResponse": {
"type": "object",
"properties": {
"msg": {
"type": "string"
}
}
},
"api.ErrorResponse": {
"description": "Error message as returned from backend.",
"type": "object",
"properties": {
"error": {
@@ -418,8 +690,27 @@ const docTemplate = `{
}
}
},
"api.GetJobsApiResponse": {
"type": "object",
"properties": {
"items": {
"description": "Number of jobs returned",
"type": "integer"
},
"jobs": {
"description": "Array of jobs",
"type": "array",
"items": {
"$ref": "#/definitions/schema.JobMeta"
}
},
"page": {
"description": "Page id returned",
"type": "integer"
}
}
},
"api.StartJobApiResponse": {
"description": "Successful job start response with database id of new job.",
"type": "object",
"properties": {
"id": {
@@ -429,7 +720,6 @@ const docTemplate = `{
}
},
"api.StopJobApiRequest": {
"description": "Request to stop running job using stoptime and final state. They are only required if no database id was provided with endpoint.",
"type": "object",
"required": [
"jobState",
@@ -447,14 +737,11 @@ const docTemplate = `{
"example": 123000
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout"
"description": "Final job state",
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
}
],
"example": "completed"
},
@@ -470,22 +757,6 @@ const docTemplate = `{
}
}
},
"api.Tag": {
"description": "Defines a tag using name and type.",
"type": "object",
"properties": {
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}
}
},
"schema.Job": {
"description": "Information of a HPC job.",
"type": "object",
@@ -524,14 +795,10 @@ const docTemplate = `{
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
}
],
"example": "completed"
},
@@ -656,14 +923,10 @@ const docTemplate = `{
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
}
],
"example": "completed"
},
@@ -759,6 +1022,29 @@ const docTemplate = `{
}
}
},
"schema.JobState": {
"type": "string",
"enum": [
"running",
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"preempted",
"out_of_memory"
],
"x-enum-varnames": [
"JobStateRunning",
"JobStateCompleted",
"JobStateFailed",
"JobStateCancelled",
"JobStateStopped",
"JobStateTimeout",
"JobStatePreempted",
"JobStateOutOfMemory"
]
},
"schema.JobStatistics": {
"description": "Specification for job metric statistics.",
"type": "object",
@@ -839,22 +1125,26 @@ const docTemplate = `{
},
"securityDefinitions": {
"ApiKeyAuth": {
"description": "JWT based authentification for general API endpoint use.",
"type": "apiKey",
"name": "X-Auth-Token",
"in": "header"
}
}
},
"tags": [
{
"name": "Job API"
}
]
}`
// SwaggerInfo holds exported Swagger Info so clients can modify it
var SwaggerInfo = &swag.Spec{
Version: "0.1.0",
Host: "clustercockpit.localhost:8082",
Version: "0.2.0",
Host: "localhost:8080",
BasePath: "/api",
Schemes: []string{},
Title: "ClusterCockpit REST API",
Description: "Defines a tag using name and type.",
Description: "API for batch job control.",
InfoInstanceName: "swagger",
SwaggerTemplate: docTemplate,
}

View File

@@ -6,7 +6,6 @@ package api
import (
"bufio"
"context"
"database/sql"
"encoding/json"
"errors"
@@ -23,7 +22,6 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
@@ -32,9 +30,10 @@ import (
)
// @title ClusterCockpit REST API
// @version 0.1.0
// @version 0.2.0
// @description API for batch job control.
// @termsOfService https://monitoring.nhr.fau.de/imprint
// @tag.name Job API
// @contact.name ClusterCockpit Project
// @contact.url https://github.com/ClusterCockpit
@@ -43,20 +42,19 @@ import (
// @license.name MIT License
// @license.url https://opensource.org/licenses/MIT
// @host clustercockpit.localhost:8082
// @BasePath /api
// @host localhost:8080
// @basePath /api
// @securityDefinitions.apikey ApiKeyAuth
// @in header
// @name X-Auth-Token
// @description JWT based authentification for general API endpoint use.
type RestApi struct {
JobRepository *repository.JobRepository
Resolver *graph.Resolver
Authentication *auth.Authentication
MachineStateDir string
OngoingArchivings sync.WaitGroup
JobRepository *repository.JobRepository
Resolver *graph.Resolver
Authentication *auth.Authentication
MachineStateDir string
RepositoryMutex sync.Mutex
}
func (api *RestApi) MountRoutes(r *mux.Router) {
@@ -72,9 +70,13 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
// r.HandleFunc("/jobs/{id}", api.getJob).Methods(http.MethodGet)
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
r.HandleFunc("/jobs/delete_job/", api.deleteJobByRequest).Methods(http.MethodDelete)
r.HandleFunc("/jobs/delete_job/{id}", api.deleteJobById).Methods(http.MethodDelete)
r.HandleFunc("/jobs/delete_job_before/{ts}", api.deleteJobBefore).Methods(http.MethodDelete)
if api.Authentication != nil {
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
r.HandleFunc("/roles/", api.getRoles).Methods(http.MethodGet)
r.HandleFunc("/users/", api.createUser).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/users/", api.getUsers).Methods(http.MethodGet)
r.HandleFunc("/users/", api.deleteUser).Methods(http.MethodDelete)
@@ -89,44 +91,58 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
}
// StartJobApiResponse model
// @Description Successful job start response with database id of new job.
type StartJobApiResponse struct {
// Database ID of new job
DBID int64 `json:"id"`
}
// DeleteJobApiResponse model
type DeleteJobApiResponse struct {
Message string `json:"msg"`
}
// StopJobApiRequest model
// @Description Request to stop running job using stoptime and final state.
// @Description They are only required if no database id was provided with endpoint.
type StopJobApiRequest struct {
// Stop Time of job as epoch
StopTime int64 `json:"stopTime" validate:"required" example:"1649763839"`
State schema.JobState `json:"jobState" validate:"required" example:"completed" enums:"completed,failed,cancelled,stopped,timeout"` // Final job state
JobId *int64 `json:"jobId" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
State schema.JobState `json:"jobState" validate:"required" example:"completed"` // Final job state
JobId *int64 `json:"jobId" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
}
// DeleteJobApiRequest model
type DeleteJobApiRequest struct {
JobId *int64 `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
}
// GetJobsApiResponse model
type GetJobsApiResponse struct {
Jobs []*schema.JobMeta `json:"jobs"` // Array of jobs
Items int `json:"items"` // Number of jobs returned
Page int `json:"page"` // Page id returned
}
// ErrorResponse model
// @Description Error message as returned from backend.
type ErrorResponse struct {
// Statustext of Errorcode
Status string `json:"status"`
Error string `json:"error"` // Error Message
}
// Tag model
// @Description Defines a tag using name and type.
type Tag struct {
// ApiTag model
type ApiTag struct {
// Tag Type
Type string `json:"type" example:"Debug"`
Name string `json:"name" example:"Testjob"` // Tag Name
}
type TagJobApiRequest []*Tag
type TagJobApiRequest []*ApiTag
func handleError(err error, statusCode int, rw http.ResponseWriter) {
log.Warnf("REST API: %s", err.Error())
log.Warnf("REST ERROR : %s", err.Error())
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(statusCode)
json.NewEncoder(rw).Encode(ErrorResponse{
@@ -142,35 +158,43 @@ func decode(r io.Reader, val interface{}) error {
}
// getJobs godoc
// @Summary Lists all jobs
// @Description Get a list of all jobs. Filters can be applied using query parameters.
// @Description Number of results can be limited by page. Results are sorted by descending startTime.
// @Accept json
// @Produce json
// @Param state query string false "Job State" Enums(running, completed, failed, cancelled, stopped, timeout)
// @Param cluster query string false "Job Cluster"
// @Param start-time query string false "Syntax: '$from-$to', as unix epoch timestamps in seconds"
// @Param items-per-page query int false "Items per page (If empty: No Limit)"
// @Param page query int false "Page Number (If empty: No Paging)"
// @Param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
// @Success 200 {array} schema.Job "Array of matching jobs"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/ [get]
// @summary Lists all jobs
// @tags query
// @description Get a list of all jobs. Filters can be applied using query parameters.
// @description Number of results can be limited by page. Results are sorted by descending startTime.
// @produce json
// @param state query string false "Job State" Enums(running, completed, failed, cancelled, stopped, timeout)
// @param cluster query string false "Job Cluster"
// @param start-time query string false "Syntax: '$from-$to', as unix epoch timestamps in seconds"
// @param items-per-page query int false "Items per page (Default: 25)"
// @param page query int false "Page Number (Default: 1)"
// @param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
// @success 200 {object} api.GetJobsApiResponse "Job array and page info"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/ [get]
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
withMetadata := false
filter := &model.JobFilter{}
page := &model.PageRequest{ItemsPerPage: -1, Page: 1}
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
for key, vals := range r.URL.Query() {
switch key {
case "state":
for _, s := range vals {
state := schema.JobState(s)
if !state.Valid() {
http.Error(rw, "invalid query parameter value: state", http.StatusBadRequest)
handleError(fmt.Errorf("invalid query parameter value: state"),
http.StatusBadRequest, rw)
return
}
filter.State = append(filter.State, state)
@@ -180,17 +204,18 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
case "start-time":
st := strings.Split(vals[0], "-")
if len(st) != 2 {
http.Error(rw, "invalid query parameter value: startTime", http.StatusBadRequest)
handleError(fmt.Errorf("invalid query parameter value: startTime"),
http.StatusBadRequest, rw)
return
}
from, err := strconv.ParseInt(st[0], 10, 64)
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
handleError(err, http.StatusBadRequest, rw)
return
}
to, err := strconv.ParseInt(st[1], 10, 64)
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
handleError(err, http.StatusBadRequest, rw)
return
}
ufrom, uto := time.Unix(from, 0), time.Unix(to, 0)
@@ -198,28 +223,29 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
case "page":
x, err := strconv.Atoi(vals[0])
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
handleError(err, http.StatusBadRequest, rw)
return
}
page.Page = x
case "items-per-page":
x, err := strconv.Atoi(vals[0])
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
handleError(err, http.StatusBadRequest, rw)
return
}
page.ItemsPerPage = x
case "with-metadata":
withMetadata = true
default:
http.Error(rw, "invalid query parameter: "+key, http.StatusBadRequest)
handleError(fmt.Errorf("invalid query parameter: %s", key),
http.StatusBadRequest, rw)
return
}
}
jobs, err := api.JobRepository.QueryJobs(r.Context(), []*model.JobFilter{filter}, page, order)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(err, http.StatusInternalServerError, rw)
return
}
@@ -227,7 +253,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
for _, job := range jobs {
if withMetadata {
if _, err := api.JobRepository.FetchMetadata(job); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(err, http.StatusInternalServerError, rw)
return
}
}
@@ -240,7 +266,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
res.Tags, err = api.JobRepository.GetTags(&job.ID)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(err, http.StatusInternalServerError, rw)
return
}
@@ -248,7 +274,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
res.Statistics, err = archive.GetStatistics(job)
if err != nil {
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(err, http.StatusInternalServerError, rw)
return
}
}
@@ -258,32 +284,44 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
}
log.Debugf("/api/jobs: %d jobs returned", len(results))
rw.Header().Add("Content-Type", "application/json")
bw := bufio.NewWriter(rw)
defer bw.Flush()
if err := json.NewEncoder(bw).Encode(map[string]interface{}{
"jobs": results,
}); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
payload := GetJobsApiResponse{
Jobs: results,
Items: page.ItemsPerPage,
Page: page.Page,
}
if err := json.NewEncoder(bw).Encode(payload); err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
}
}
// tagJob godoc
// @Summary Adds one or more tags to a job
// @Description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
// @Description If tagged job is already finished: Tag will be written directly to respective archive files.
// @Accept json
// @Produce json
// @Param id path int true "Job Database ID"
// @Param request body api.TagJobApiRequest true "Array of tag-objects to add"
// @Success 200 {object} schema.Job "Updated job resource"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 404 {object} api.ErrorResponse "Job or tag does not exist"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/tag_job/{id} [post]
// @summary Adds one or more tags to a job
// @tags add and modify
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
// @accept json
// @produce json
// @param id path int true "Job Database ID"
// @param request body api.TagJobApiRequest true "Array of tag-objects to add"
// @success 200 {object} schema.Job "Updated job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 404 {object} api.ErrorResponse "Job or tag does not exist"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/tag_job/{id} [post]
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
@@ -328,23 +366,24 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
}
// startJob godoc
// @Summary Adds a new job as "running"
// @Description Job specified in request body will be saved to database as "running" with new DB ID.
// @Description Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
// @Accept json
// @Produce json
// @Param request body schema.JobMeta true "Job to add"
// @Success 201 {object} api.StartJobApiResponse "Job added successfully"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 403 {object} api.ErrorResponse "Forbidden"
// @Failure 422 {object} api.ErrorResponse "Unprocessable Entity: The combination of jobId, clusterId and startTime does already exist"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/start_job/ [post]
// @summary Adds a new job as "running"
// @tags add and modify
// @description Job specified in request body will be saved to database as "running" with new DB ID.
// @description Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
// @accept json
// @produce json
// @param request body schema.JobMeta true "Job to add"
// @success 201 {object} api.StartJobApiResponse "Job added successfully"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: The combination of jobId, clusterId and startTime does already exist"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/start_job/ [post]
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
@@ -362,15 +401,22 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
return
}
// aquire lock to avoid race condition between API calls
var unlockOnce sync.Once
api.RepositoryMutex.Lock()
defer unlockOnce.Do(api.RepositoryMutex.Unlock)
// Check if combination of (job_id, cluster_id, start_time) already exists:
job, err := api.JobRepository.Find(&req.JobID, &req.Cluster, nil)
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
if err != nil && err != sql.ErrNoRows {
handleError(fmt.Errorf("checking for duplicate failed: %w", err), http.StatusInternalServerError, rw)
return
} else if err == nil {
if (req.StartTime - job.StartTimeUnix) < 86400 {
handleError(fmt.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d", job.ID), http.StatusUnprocessableEntity, rw)
return
for _, job := range jobs {
if (req.StartTime - job.StartTimeUnix) < 86400 {
handleError(fmt.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d", job.ID), http.StatusUnprocessableEntity, rw)
return
}
}
}
@@ -379,6 +425,8 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
handleError(fmt.Errorf("insert into database failed: %w", err), http.StatusInternalServerError, rw)
return
}
// unlock here, adding Tags can be async
unlockOnce.Do(api.RepositoryMutex.Unlock)
for _, tag := range req.Tags {
if _, err := api.JobRepository.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
@@ -397,25 +445,26 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
}
// stopJobById godoc
// @Summary Marks job as completed and triggers archiving
// @Description Job to stop is specified by database ID. Only stopTime and final state are required in request body.
// @Description Returns full job resource information according to 'JobMeta' scheme.
// @Accept json
// @Produce json
// @Param id path int true "Database ID of Job"
// @Param request body api.StopJobApiRequest true "stopTime and final state in request body"
// @Success 200 {object} schema.JobMeta "Job resource"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 403 {object} api.ErrorResponse "Forbidden"
// @Failure 404 {object} api.ErrorResponse "Resource not found"
// @Failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/stop_job/{id} [post]
// @summary Marks job as completed and triggers archiving
// @tags add and modify
// @description Job to stop is specified by database ID. Only stopTime and final state are required in request body.
// @description Returns full job resource information according to 'JobMeta' scheme.
// @accept json
// @produce json
// @param id path int true "Database ID of Job"
// @param request body api.StopJobApiRequest true "stopTime and final state in request body"
// @success 200 {object} schema.JobMeta "Job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 404 {object} api.ErrorResponse "Resource not found"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/stop_job/{id} [post]
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
@@ -451,23 +500,24 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
}
// stopJobByRequest godoc
// @Summary Marks job as completed and triggers archiving
// @Description Job to stop is specified by request body. All fields are required in this case.
// @Description Returns full job resource information according to 'JobMeta' scheme.
// @Produce json
// @Param request body api.StopJobApiRequest true "All fields required"
// @Success 200 {object} schema.JobMeta "Job resource"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 403 {object} api.ErrorResponse "Forbidden"
// @Failure 404 {object} api.ErrorResponse "Resource not found"
// @Failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/stop_job/ [post]
// @summary Marks job as completed and triggers archiving
// @tags add and modify
// @description Job to stop is specified by request body. All fields are required in this case.
// @description Returns full job resource information according to 'JobMeta' scheme.
// @produce json
// @param request body api.StopJobApiRequest true "All fields required"
// @success 200 {object} schema.JobMeta "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 404 {object} api.ErrorResponse "Resource not found"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/stop_job/ [post]
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
@@ -496,6 +546,159 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
api.checkAndHandleStopJob(rw, job, req)
}
// deleteJobById godoc
// @summary Remove a job from the sql database
// @tags remove
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
// @produce json
// @param id path int true "Database ID of Job"
// @success 200 {object} api.DeleteJobApiResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 404 {object} api.ErrorResponse "Resource not found"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/delete_job/{id} [delete]
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
// Fetch job (that will be stopped) from db
id, ok := mux.Vars(r)["id"]
var err error
if ok {
id, e := strconv.ParseInt(id, 10, 64)
if e != nil {
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
return
}
err = api.JobRepository.DeleteJobById(id)
} else {
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
return
}
if err != nil {
handleError(fmt.Errorf("deleting job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
Message: fmt.Sprintf("Successfully deleted job %s", id),
})
}
// deleteJobByRequest godoc
// @summary Remove a job from the sql database
// @tags remove
// @description Job to delete is specified by request body. All fields are required in this case.
// @accept json
// @produce json
// @param request body api.DeleteJobApiRequest true "All fields required"
// @success 200 {object} api.DeleteJobApiResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 404 {object} api.ErrorResponse "Resource not found"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/delete_job/ [delete]
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
// Parse request body
req := DeleteJobApiRequest{}
if err := decode(r.Body, &req); err != nil {
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
return
}
// Fetch job (that will be deleted) from db
var job *schema.Job
var err error
if req.JobId == nil {
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
return
}
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
err = api.JobRepository.DeleteJobById(job.ID)
if err != nil {
handleError(fmt.Errorf("deleting job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
})
}
// deleteJobBefore godoc
// @summary Remove a job from the sql database
// @tags remove
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
// @produce json
// @param ts path int true "Unix epoch timestamp"
// @success 200 {object} api.DeleteJobApiResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 404 {object} api.ErrorResponse "Resource not found"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/delete_job_before/{ts} [delete]
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw)
return
}
var cnt int
// Fetch job (that will be stopped) from db
id, ok := mux.Vars(r)["ts"]
var err error
if ok {
ts, e := strconv.ParseInt(id, 10, 64)
if e != nil {
handleError(fmt.Errorf("integer expected in path for ts: %w", e), http.StatusBadRequest, rw)
return
}
cnt, err = api.JobRepository.DeleteJobsBefore(ts)
} else {
handleError(errors.New("the parameter 'ts' is required"), http.StatusBadRequest, rw)
return
}
if err != nil {
handleError(fmt.Errorf("deleting jobs failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
})
}
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
// Sanity checks
@@ -507,7 +710,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
if req.State != "" && !req.State.Valid() {
handleError(fmt.Errorf("invalid job state: %#v", req.State), http.StatusBadRequest, rw)
return
} else {
} else if req.State == "" {
req.State = schema.JobStateCompleted
}
@@ -533,59 +736,10 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
return
}
// We need to start a new goroutine as this functions needs to return
// for the response to be flushed to the client.
api.OngoingArchivings.Add(1) // So that a shutdown does not interrupt this goroutine.
go func() {
defer api.OngoingArchivings.Done()
if _, err := api.JobRepository.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
api.JobRepository.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
return
}
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and create meta.json/data.json files
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
api.JobRepository.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
return
}
// Update the jobs database entry one last time:
if err := api.JobRepository.Archive(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
return
}
log.Printf("archiving job (dbid: %d) successful", job.ID)
}()
// Trigger async archiving
api.JobRepository.TriggerArchiving(job)
}
// func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
// handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
// return
// }
// var body struct {
// Meta *schema.JobMeta `json:"meta"`
// Data *schema.JobData `json:"data"`
// }
// if err := decode(r.Body, &body); err != nil {
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusBadRequest, rw)
// return
// }
// if err := api.JobRepository.ImportJob(body.Meta, body.Data); err != nil {
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusUnprocessableEntity, rw)
// return
// }
// rw.Write([]byte(`{ "status": "OK" }`))
// }
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
id := mux.Vars(r)["id"]
metrics := r.URL.Query()["metric"]
@@ -634,7 +788,8 @@ func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
me := auth.GetUser(r.Context())
if !me.HasRole(auth.RoleAdmin) {
if username != me.Username {
http.Error(rw, "only admins are allowed to sign JWTs not for themselves", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to sign JWTs not for themselves",
http.StatusForbidden)
return
}
}
@@ -659,13 +814,21 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
rw.Header().Set("Content-Type", "text/plain")
me := auth.GetUser(r.Context())
if !me.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to create new users", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
return
}
username, password, role, name, email := r.FormValue("username"), r.FormValue("password"), r.FormValue("role"), r.FormValue("name"), r.FormValue("email")
if len(password) == 0 && role != auth.RoleApi {
http.Error(rw, "only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
username, password, role, name, email, project := r.FormValue("username"), r.FormValue("password"), r.FormValue("role"), r.FormValue("name"), r.FormValue("email"), r.FormValue("project")
if len(password) == 0 && role != auth.GetRoleString(auth.RoleApi) {
http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
return
}
if len(project) != 0 && role != auth.GetRoleString(auth.RoleManager) {
http.Error(rw, "only managers require a project (can be changed later)", http.StatusBadRequest)
return
} else if len(project) == 0 && role == auth.GetRoleString(auth.RoleManager) {
http.Error(rw, "managers require a project to manage (can be changed later)", http.StatusBadRequest)
return
}
@@ -674,17 +837,18 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
Name: name,
Password: password,
Email: email,
Projects: []string{project},
Roles: []string{role}}); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
}
rw.Write([]byte(fmt.Sprintf("User %#v successfully created!\n", username)))
rw.Write([]byte(fmt.Sprintf("User %v successfully created!\n", username)))
}
func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to delete a user", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
return
}
@@ -699,7 +863,7 @@ func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to fetch a list of users", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
return
}
@@ -712,15 +876,33 @@ func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
json.NewEncoder(rw).Encode(users)
}
func (api *RestApi) getRoles(rw http.ResponseWriter, r *http.Request) {
user := auth.GetUser(r.Context())
if !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to fetch a list of roles", http.StatusForbidden)
return
}
roles, err := auth.GetValidRoles(user)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
json.NewEncoder(rw).Encode(roles)
}
func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); !user.HasRole(auth.RoleAdmin) {
http.Error(rw, "only admins are allowed to update a user", http.StatusForbidden)
http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
return
}
// Get Values
newrole := r.FormValue("add-role")
delrole := r.FormValue("remove-role")
newproj := r.FormValue("add-project")
delproj := r.FormValue("remove-project")
// TODO: Handle anything but roles...
if newrole != "" {
@@ -735,8 +917,20 @@ func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
return
}
rw.Write([]byte("Remove Role Success"))
} else if newproj != "" {
if err := api.Authentication.AddProject(r.Context(), mux.Vars(r)["id"], newproj); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
}
rw.Write([]byte("Add Project Success"))
} else if delproj != "" {
if err := api.Authentication.RemoveProject(r.Context(), mux.Vars(r)["id"], delproj); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
}
rw.Write([]byte("Remove Project Success"))
} else {
http.Error(rw, "Not Add or Del?", http.StatusInternalServerError)
http.Error(rw, "Not Add or Del [role|project]?", http.StatusInternalServerError)
}
}
@@ -744,7 +938,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
rw.Header().Set("Content-Type", "text/plain")
key, value := r.FormValue("key"), r.FormValue("value")
fmt.Printf("KEY: %#v\nVALUE: %#v\n", key, value)
fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, auth.GetUser(r.Context())); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
@@ -756,7 +950,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
if api.MachineStateDir == "" {
http.Error(rw, "not enabled", http.StatusNotFound)
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
return
}
@@ -787,7 +981,7 @@ func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
if api.MachineStateDir == "" {
http.Error(rw, "not enabled", http.StatusNotFound)
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
return
}

View File

@@ -9,8 +9,10 @@ import (
"crypto/rand"
"encoding/base64"
"errors"
"fmt"
"net/http"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
@@ -18,32 +20,186 @@ import (
"github.com/jmoiron/sqlx"
)
const (
RoleAdmin string = "admin"
RoleSupport string = "support"
RoleApi string = "api"
RoleUser string = "user"
)
type AuthSource int
const (
AuthViaLocalPassword int8 = 0
AuthViaLDAP int8 = 1
AuthViaToken int8 = 2
AuthViaLocalPassword AuthSource = iota
AuthViaLDAP
AuthViaToken
)
type User struct {
Username string `json:"username"`
Password string `json:"-"`
Name string `json:"name"`
Roles []string `json:"roles"`
AuthSource int8 `json:"via"`
Email string `json:"email"`
Username string `json:"username"`
Password string `json:"-"`
Name string `json:"name"`
Roles []string `json:"roles"`
AuthSource AuthSource `json:"via"`
Email string `json:"email"`
Projects []string `json:"projects"`
Expiration time.Time
}
func (u *User) HasRole(role string) bool {
type Role int
const (
RoleAnonymous Role = iota
RoleApi
RoleUser
RoleManager
RoleSupport
RoleAdmin
RoleError
)
func GetRoleString(roleInt Role) string {
return [6]string{"anonymous", "api", "user", "manager", "support", "admin"}[roleInt]
}
func getRoleEnum(roleStr string) Role {
switch strings.ToLower(roleStr) {
case "admin":
return RoleAdmin
case "support":
return RoleSupport
case "manager":
return RoleManager
case "user":
return RoleUser
case "api":
return RoleApi
case "anonymous":
return RoleAnonymous
default:
return RoleError
}
}
func isValidRole(role string) bool {
if getRoleEnum(role) == RoleError {
return false
}
return true
}
func (u *User) HasValidRole(role string) (hasRole bool, isValid bool) {
if isValidRole(role) {
for _, r := range u.Roles {
if r == role {
return true, true
}
}
return false, true
}
return false, false
}
func (u *User) HasRole(role Role) bool {
for _, r := range u.Roles {
if r == role {
if r == GetRoleString(role) {
return true
}
}
return false
}
// Role-Arrays are short: performance not impacted by nested loop
func (u *User) HasAnyRole(queryroles []Role) bool {
for _, ur := range u.Roles {
for _, qr := range queryroles {
if ur == GetRoleString(qr) {
return true
}
}
}
return false
}
// Role-Arrays are short: performance not impacted by nested loop
func (u *User) HasAllRoles(queryroles []Role) bool {
target := len(queryroles)
matches := 0
for _, ur := range u.Roles {
for _, qr := range queryroles {
if ur == GetRoleString(qr) {
matches += 1
break
}
}
}
if matches == target {
return true
} else {
return false
}
}
// Role-Arrays are short: performance not impacted by nested loop
func (u *User) HasNotRoles(queryroles []Role) bool {
matches := 0
for _, ur := range u.Roles {
for _, qr := range queryroles {
if ur == GetRoleString(qr) {
matches += 1
break
}
}
}
if matches == 0 {
return true
} else {
return false
}
}
// Called by API endpoint '/roles/' from frontend: Only required for admin config -> Check Admin Role
func GetValidRoles(user *User) ([]string, error) {
var vals []string
if user.HasRole(RoleAdmin) {
for i := RoleApi; i < RoleError; i++ {
vals = append(vals, GetRoleString(i))
}
return vals, nil
}
return vals, fmt.Errorf("%s: only admins are allowed to fetch a list of roles", user.Username)
}
// Called by routerConfig web.page setup in backend: Only requires known user and/or not API user
func GetValidRolesMap(user *User) (map[string]Role, error) {
named := make(map[string]Role)
if user.HasNotRoles([]Role{RoleApi, RoleAnonymous}) {
for i := RoleApi; i < RoleError; i++ {
named[GetRoleString(i)] = i
}
return named, nil
}
return named, fmt.Errorf("Only known users are allowed to fetch a list of roles")
}
// Find highest role
func (u *User) GetAuthLevel() Role {
if u.HasRole(RoleAdmin) {
return RoleAdmin
} else if u.HasRole(RoleSupport) {
return RoleSupport
} else if u.HasRole(RoleManager) {
return RoleManager
} else if u.HasRole(RoleUser) {
return RoleUser
} else if u.HasRole(RoleApi) {
return RoleApi
} else if u.HasRole(RoleAnonymous) {
return RoleAnonymous
} else {
return RoleError
}
}
func (u *User) HasProject(project string) bool {
for _, p := range u.Projects {
if p == project {
return true
}
}
@@ -85,29 +241,20 @@ func Init(db *sqlx.DB,
configs map[string]interface{}) (*Authentication, error) {
auth := &Authentication{}
auth.db = db
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);`)
if err != nil {
return nil, err
}
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Error("Error while initializing authentication -> decoding session key failed")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
@@ -115,12 +262,14 @@ func Init(db *sqlx.DB,
auth.LocalAuth = &LocalAuthenticator{}
if err := auth.LocalAuth.Init(auth, nil); err != nil {
log.Error("Error while initializing authentication -> localAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
auth.JwtAuth = &JWTAuthenticator{}
if err := auth.JwtAuth.Init(auth, configs["jwt"]); err != nil {
log.Error("Error while initializing authentication -> jwtAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.JwtAuth)
@@ -128,6 +277,7 @@ func Init(db *sqlx.DB,
if config, ok := configs["ldap"]; ok {
auth.LdapAuth = &LdapAuthenticator{}
if err := auth.LdapAuth.Init(auth, config); err != nil {
log.Error("Error while initializing authentication -> ldapAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
@@ -142,6 +292,7 @@ func (auth *Authentication) AuthViaSession(
session, err := auth.sessionStore.Get(r, "session")
if err != nil {
log.Error("Error while getting session store")
return nil, err
}
@@ -150,9 +301,11 @@ func (auth *Authentication) AuthViaSession(
}
username, _ := session.Values["username"].(string)
projects, _ := session.Values["projects"].([]string)
roles, _ := session.Values["roles"].([]string)
return &User{
Username: username,
Projects: projects,
Roles: roles,
AuthSource: -1,
}, nil
@@ -169,7 +322,7 @@ func (auth *Authentication) Login(
user := (*User)(nil)
if username != "" {
if user, _ = auth.GetUser(username); err != nil {
// log.Warnf("login of unkown user %#v", username)
// log.Warnf("login of unkown user %v", username)
_ = err
}
}
@@ -181,7 +334,7 @@ func (auth *Authentication) Login(
user, err = authenticator.Login(user, rw, r)
if err != nil {
log.Warnf("login failed: %s", err.Error())
log.Warnf("user '%s' login failed: %s", user.Username, err.Error())
onfailure(rw, r, err)
return
}
@@ -197,14 +350,15 @@ func (auth *Authentication) Login(
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
}
session.Values["username"] = user.Username
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
if err := auth.sessionStore.Save(r, rw, session); err != nil {
log.Errorf("session save failed: %s", err.Error())
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
log.Infof("login successfull: user: %#v (roles: %v)", user.Username, user.Roles)
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
ctx := context.WithValue(r.Context(), ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return

View File

@@ -1 +1,129 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package auth
import (
"testing"
)
func TestHasValidRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user"}}
exists, _ := u.HasValidRole("user")
if !exists {
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("user"): EXISTS = %v, expected 'true'.`, exists)
}
}
func TestHasNotValidRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user"}}
exists, _ := u.HasValidRole("manager")
if exists {
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("manager"): EXISTS = %v, expected 'false'.`, exists)
}
}
func TestHasInvalidRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user"}}
_, valid := u.HasValidRole("invalid")
if valid {
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("invalid"): VALID = %v, expected 'false'.`, valid)
}
}
func TestHasNotInvalidRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user"}}
_, valid := u.HasValidRole("user")
if !valid {
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("user"): VALID = %v, expected 'true'.`, valid)
}
}
func TestHasRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user"}}
exists := u.HasRole(RoleUser)
if !exists {
t.Fatalf(`User{Roles: ["user"]} -> HasRole(RoleUser): EXISTS = %v, expected 'true'.`, exists)
}
}
func TestHasNotRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user"}}
exists := u.HasRole(RoleManager)
if exists {
t.Fatalf(`User{Roles: ["user"]} -> HasRole(RoleManager): EXISTS = %v, expected 'false'.`, exists)
}
}
func TestHasAnyRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
result := u.HasAnyRole([]Role{RoleManager, RoleSupport, RoleAdmin})
if !result {
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasAnyRole([]Role{RoleManager, RoleSupport, RoleAdmin}): RESULT = %v, expected 'true'.`, result)
}
}
func TestHasNotAnyRole(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
result := u.HasAnyRole([]Role{RoleSupport, RoleAdmin})
if result {
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasAllRoles([]Role{RoleSupport, RoleAdmin}): RESULT = %v, expected 'false'.`, result)
}
}
func TestHasAllRoles(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user", "manager", "support"}}
result := u.HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport})
if !result {
t.Fatalf(`User{Roles: ["user", "manager", "support"]} -> HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport}): RESULT = %v, expected 'true'.`, result)
}
}
func TestHasNotAllRoles(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
result := u.HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport})
if result {
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport}): RESULT = %v, expected 'false'.`, result)
}
}
func TestHasNotRoles(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
result := u.HasNotRoles([]Role{RoleSupport, RoleAdmin})
if !result {
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasNotRoles([]Role{RoleSupport, RoleAdmin}): RESULT = %v, expected 'true'.`, result)
}
}
func TestHasAllNotRoles(t *testing.T) {
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
result := u.HasNotRoles([]Role{RoleUser, RoleManager})
if result {
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasNotRoles([]Role{RoleUser, RoleManager}): RESULT = %v, expected 'false'.`, result)
}
}

View File

@@ -23,8 +23,9 @@ import (
type JWTAuthenticator struct {
auth *Authentication
publicKey ed25519.PublicKey
privateKey ed25519.PrivateKey
publicKey ed25519.PublicKey
privateKey ed25519.PrivateKey
publicKeyCrossLogin ed25519.PublicKey // For accepting externally generated JWTs
loginTokenKey []byte // HS256 key
@@ -44,11 +45,13 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
} else {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
log.Warn("Could not decode JWT public key")
return err
}
ja.publicKey = ed25519.PublicKey(bytes)
bytes, err = base64.StdEncoding.DecodeString(privKey)
if err != nil {
log.Warn("Could not decode JWT private key")
return err
}
ja.privateKey = ed25519.PrivateKey(bytes)
@@ -57,11 +60,41 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
if pubKey = os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
log.Warn("Could not decode cross login JWT HS512 key")
return err
}
ja.loginTokenKey = bytes
}
// Look for external public keys
pubKeyCrossLogin, keyFound := os.LookupEnv("CROSS_LOGIN_JWT_PUBLIC_KEY")
if keyFound && pubKeyCrossLogin != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
if err != nil {
log.Warn("Could not decode cross login JWT public key")
return err
}
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
// Warn if other necessary settings are not configured
if ja.config != nil {
if ja.config.CookieName == "" {
log.Warn("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
}
if !ja.config.ForceJWTValidationViaDatabase {
log.Warn("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
}
if ja.config.TrustedExternalIssuer == "" {
log.Warn("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
}
} else {
log.Warn("cookieName and trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
}
} else {
ja.publicKeyCrossLogin = nil
log.Warn("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
}
return nil
}
@@ -94,13 +127,15 @@ func (ja *JWTAuthenticator) Login(
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
return ja.loginTokenKey, nil
}
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
return nil, fmt.Errorf("AUTH/JWT > unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
})
if err != nil {
log.Warn("Error while parsing jwt token")
return nil, err
}
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, err
}
@@ -111,17 +146,22 @@ func (ja *JWTAuthenticator) Login(
if rawroles, ok := claims["roles"].([]interface{}); ok {
for _, rr := range rawroles {
if r, ok := rr.(string); ok {
roles = append(roles, r)
if isValidRole(r) {
roles = append(roles, r)
}
}
}
}
if rawrole, ok := claims["roles"].(string); ok {
roles = append(roles, rawrole)
if isValidRole(rawrole) {
roles = append(roles, rawrole)
}
}
if user == nil {
user, err = ja.auth.GetUser(sub)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%v'", sub)
return nil, err
} else if user == nil {
user = &User{
@@ -130,6 +170,7 @@ func (ja *JWTAuthenticator) Login(
AuthSource: AuthViaToken,
}
if err := ja.auth.AddUser(user); err != nil {
log.Errorf("Error while adding user '%v' to auth from token", user.Username)
return nil, err
}
}
@@ -149,38 +190,122 @@ func (ja *JWTAuthenticator) Auth(
rawtoken = strings.TrimPrefix(rawtoken, "Bearer ")
}
// If no auth header was found, check for a certain cookie containing a JWT
cookieName := ""
cookieFound := false
if ja.config != nil && ja.config.CookieName != "" {
cookieName = ja.config.CookieName
}
// Try to read the JWT cookie
if rawtoken == "" && cookieName != "" {
jwtCookie, err := r.Cookie(cookieName)
if err == nil && jwtCookie.Value != "" {
rawtoken = jwtCookie.Value
cookieFound = true
}
}
// Because a user can also log in via a token, the
// session cookie must be checked here as well:
if rawtoken == "" {
return ja.auth.AuthViaSession(rw, r)
}
// Try to parse JWT
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
if t.Method != jwt.SigningMethodEdDSA {
return nil, errors.New("only Ed25519/EdDSA supported")
}
// Is there more than one public key?
if ja.publicKeyCrossLogin != nil && ja.config != nil && ja.config.TrustedExternalIssuer != "" {
// Determine whether to use the external public key
unvalidatedIssuer, success := t.Claims.(jwt.MapClaims)["iss"].(string)
if success && unvalidatedIssuer == ja.config.TrustedExternalIssuer {
// The (unvalidated) issuer seems to be the expected one,
// use public cross login key from config
return ja.publicKeyCrossLogin, nil
}
}
// No cross login key configured or issuer not expected
// Try own key
return ja.publicKey, nil
})
if err != nil {
log.Warn("Error while parsing token")
return nil, err
}
// Check token validity
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, err
}
// Token is valid, extract payload
claims := token.Claims.(jwt.MapClaims)
sub, _ := claims["sub"].(string)
var roles []string
if rawroles, ok := claims["roles"].([]interface{}); ok {
for _, rr := range rawroles {
if r, ok := rr.(string); ok {
roles = append(roles, r)
// Validate user + roles from JWT against database?
if ja.config != nil && ja.config.ForceJWTValidationViaDatabase {
user, err := ja.auth.GetUser(sub)
// Deny any logins for unknown usernames
if err != nil {
log.Warn("Could not find user from JWT in internal database.")
return nil, errors.New("unknown user")
}
// Take user roles from database instead of trusting the JWT
roles = user.Roles
} else {
// Extract roles from JWT (if present)
if rawroles, ok := claims["roles"].([]interface{}); ok {
for _, rr := range rawroles {
if r, ok := rr.(string); ok {
roles = append(roles, r)
}
}
}
}
if cookieFound {
// Create a session so that we no longer need the JTW Cookie
session, err := ja.auth.sessionStore.New(r, "session")
if err != nil {
log.Errorf("session creation failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return nil, err
}
if ja.auth.SessionMaxAge != 0 {
session.Options.MaxAge = int(ja.auth.SessionMaxAge.Seconds())
}
session.Values["username"] = sub
session.Values["roles"] = roles
if err := ja.auth.sessionStore.Save(r, rw, session); err != nil {
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return nil, err
}
// (Ask browser to) Delete JWT cookie
deletedCookie := &http.Cookie{
Name: cookieName,
Value: "",
Path: "/",
MaxAge: -1,
HttpOnly: true,
}
http.SetCookie(rw, deletedCookie)
}
return &User{
Username: sub,
Roles: roles,

View File

@@ -39,21 +39,23 @@ func (la *LdapAuthenticator) Init(
if la.config != nil && la.config.SyncInterval != "" {
interval, err := time.ParseDuration(la.config.SyncInterval)
if err != nil {
log.Warnf("Could not parse duration for sync interval: %v", la.config.SyncInterval)
return err
}
if interval == 0 {
log.Info("Sync interval is zero")
return nil
}
go func() {
ticker := time.NewTicker(interval)
for t := range ticker.C {
log.Printf("LDAP sync started at %s", t.Format(time.RFC3339))
log.Printf("sync started at %s", t.Format(time.RFC3339))
if err := la.Sync(); err != nil {
log.Errorf("LDAP sync failed: %s", err.Error())
log.Errorf("sync failed: %s", err.Error())
}
log.Print("LDAP sync done")
log.Print("sync done")
}
}()
}
@@ -76,12 +78,14 @@ func (la *LdapAuthenticator) Login(
l, err := la.getLdapConnection(false)
if err != nil {
log.Warn("Error while getting ldap connection")
return nil, err
}
defer l.Close()
userDn := strings.Replace(la.config.UserBind, "{username}", user.Username, -1)
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
log.Error("Error while binding to ldap connection")
return nil, err
}
@@ -104,12 +108,14 @@ func (la *LdapAuthenticator) Sync() error {
users := map[string]int{}
rows, err := la.auth.db.Query(`SELECT username FROM user WHERE user.ldap = 1`)
if err != nil {
log.Warn("Error while querying LDAP users")
return err
}
for rows.Next() {
var username string
if err := rows.Scan(&username); err != nil {
log.Warnf("Error while scanning for user '%s'", username)
return err
}
@@ -118,6 +124,7 @@ func (la *LdapAuthenticator) Sync() error {
l, err := la.getLdapConnection(true)
if err != nil {
log.Error("LDAP connection error")
return err
}
defer l.Close()
@@ -126,6 +133,7 @@ func (la *LdapAuthenticator) Sync() error {
la.config.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
la.config.UserFilter, []string{"dn", "uid", "gecos"}, nil))
if err != nil {
log.Warn("LDAP search error")
return err
}
@@ -147,15 +155,17 @@ func (la *LdapAuthenticator) Sync() error {
for username, where := range users {
if where == IN_DB && la.config.SyncDelOldUsers {
log.Debugf("ldap-sync: remove %#v (does not show up in LDAP anymore)", username)
log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
if _, err := la.auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
log.Errorf("User '%s' not in LDAP anymore: Delete from DB failed", username)
return err
}
} else if where == IN_LDAP {
name := newnames[username]
log.Debugf("ldap-sync: add %#v (name: %#v, roles: [user], ldap: true)", username, name)
log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
if _, err := la.auth.db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`,
username, 1, name, "[\""+RoleUser+"\"]"); err != nil {
username, 1, name, "[\""+GetRoleString(RoleUser)+"\"]"); err != nil {
log.Errorf("User '%s' new in LDAP: Insert into DB failed", username)
return err
}
}
@@ -170,12 +180,14 @@ func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
conn, err := ldap.DialURL(la.config.Url)
if err != nil {
log.Warn("LDAP URL dial failed")
return nil, err
}
if admin {
if err := conn.Bind(la.config.SearchDN, la.syncPassword); err != nil {
conn.Close()
log.Warn("LDAP connection bind failed")
return nil, err
}
}

View File

@@ -39,7 +39,7 @@ func (la *LocalAuthenticator) Login(
r *http.Request) (*User, error) {
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
return nil, fmt.Errorf("user '%s' provided the wrong password (%w)", user.Username, e)
return nil, fmt.Errorf("AUTH/LOCAL > user '%s' provided the wrong password (%w)", user.Username, e)
}
return user, nil

View File

@@ -10,6 +10,7 @@ import (
"encoding/json"
"errors"
"fmt"
"strings"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
@@ -21,10 +22,11 @@ import (
func (auth *Authentication) GetUser(username string) (*User, error) {
user := &User{Username: username}
var hashedPassword, name, rawRoles, email sql.NullString
if err := sq.Select("password", "ldap", "name", "roles", "email").From("user").
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
Where("user.username = ?", username).RunWith(auth.db).
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email); err != nil {
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
log.Warnf("Error while querying user '%v' from database", username)
return nil, err
}
@@ -33,6 +35,12 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
user.Email = email.String
if rawRoles.Valid {
if err := json.Unmarshal([]byte(rawRoles.String), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw roles from DB")
return nil, err
}
}
if rawProjects.Valid {
if err := json.Unmarshal([]byte(rawProjects.String), &user.Projects); err != nil {
return nil, err
}
}
@@ -43,9 +51,11 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
func (auth *Authentication) AddUser(user *User) error {
rolesJson, _ := json.Marshal(user.Roles)
projectsJson, _ := json.Marshal(user.Projects)
cols := []string{"username", "roles", "projects"}
vals := []interface{}{user.Username, string(rolesJson), string(projectsJson)}
cols := []string{"username", "roles"}
vals := []interface{}{user.Username, string(rolesJson)}
if user.Name != "" {
cols = append(cols, "name")
vals = append(vals, user.Name)
@@ -57,6 +67,7 @@ func (auth *Authentication) AddUser(user *User) error {
if user.Password != "" {
password, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost)
if err != nil {
log.Error("Error while encrypting new user password")
return err
}
cols = append(cols, "password")
@@ -64,28 +75,31 @@ func (auth *Authentication) AddUser(user *User) error {
}
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
return err
}
log.Infof("new user %#v created (roles: %s, auth-source: %d)", user.Username, rolesJson, user.AuthSource)
log.Infof("new user %#v created (roles: %s, auth-source: %d, projects: %s)", user.Username, rolesJson, user.AuthSource, projectsJson)
return nil
}
func (auth *Authentication) DelUser(username string) error {
_, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
log.Errorf("Error while deleting user '%s' from DB", username)
return err
}
func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
q := sq.Select("username", "name", "email", "roles").From("user")
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
if specialsOnly {
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
}
rows, err := q.RunWith(auth.db).Query()
if err != nil {
log.Warn("Error while querying user list")
return nil, err
}
@@ -93,13 +107,20 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
defer rows.Close()
for rows.Next() {
rawroles := ""
rawprojects := ""
user := &User{}
var name, email sql.NullString
if err := rows.Scan(&user.Username, &name, &email, &rawroles); err != nil {
if err := rows.Scan(&user.Username, &name, &email, &rawroles, &rawprojects); err != nil {
log.Warn("Error while scanning user list")
return nil, err
}
if err := json.Unmarshal([]byte(rawroles), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw role list")
return nil, err
}
if err := json.Unmarshal([]byte(rawprojects), &user.Projects); err != nil {
return nil, err
}
@@ -113,64 +134,137 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
func (auth *Authentication) AddRole(
ctx context.Context,
username string,
role string) error {
queryrole string) error {
newRole := strings.ToLower(queryrole)
user, err := auth.GetUser(username)
if err != nil {
log.Warnf("Could not load user '%s'", username)
return err
}
if role != RoleAdmin && role != RoleApi && role != RoleUser && role != RoleSupport {
return fmt.Errorf("invalid user role: %#v", role)
exists, valid := user.HasValidRole(newRole)
if !valid {
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
}
if exists {
return fmt.Errorf("User %v already has role %v", username, newRole)
}
for _, r := range user.Roles {
if r == role {
return fmt.Errorf("user %#v already has role %#v", username, role)
}
}
roles, _ := json.Marshal(append(user.Roles, role))
roles, _ := json.Marshal(append(user.Roles, newRole))
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while adding new role for user '%s'", user.Username)
return err
}
return nil
}
func (auth *Authentication) RemoveRole(ctx context.Context, username string, role string) error {
func (auth *Authentication) RemoveRole(ctx context.Context, username string, queryrole string) error {
oldRole := strings.ToLower(queryrole)
user, err := auth.GetUser(username)
if err != nil {
log.Warnf("Could not load user '%s'", username)
return err
}
exists, valid := user.HasValidRole(oldRole)
if !valid {
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
}
if !exists {
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
}
if oldRole == GetRoleString(RoleManager) && len(user.Projects) != 0 {
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
}
var newroles []string
for _, r := range user.Roles {
if r != oldRole {
newroles = append(newroles, r) // Append all roles not matching requested to be deleted role
}
}
var mroles, _ = json.Marshal(newroles)
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while removing role for user '%s'", user.Username)
return err
}
return nil
}
func (auth *Authentication) AddProject(
ctx context.Context,
username string,
project string) error {
user, err := auth.GetUser(username)
if err != nil {
return err
}
if role != RoleAdmin && role != RoleApi && role != RoleUser {
return fmt.Errorf("invalid user role: %#v", role)
if !user.HasRole(RoleManager) {
return fmt.Errorf("user '%s' is not a manager!", username)
}
if user.HasProject(project) {
return fmt.Errorf("user '%s' already manages project '%s'", username, project)
}
projects, _ := json.Marshal(append(user.Projects, project))
if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
return err
}
return nil
}
func (auth *Authentication) RemoveProject(ctx context.Context, username string, project string) error {
user, err := auth.GetUser(username)
if err != nil {
return err
}
if !user.HasRole(RoleManager) {
return fmt.Errorf("user '%#v' is not a manager!", username)
}
if !user.HasProject(project) {
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
}
var exists bool
var newroles []string
for _, r := range user.Roles {
if r != role {
newroles = append(newroles, r) // Append all roles not matching requested delete role
var newprojects []string
for _, p := range user.Projects {
if p != project {
newprojects = append(newprojects, p) // Append all projects not matching requested to be deleted project
} else {
exists = true
}
}
if (exists == true) {
var mroles, _ = json.Marshal(newroles)
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
if exists == true {
var result interface{}
if len(newprojects) == 0 {
result = "[]"
} else {
result, _ = json.Marshal(newprojects)
}
if _, err := sq.Update("user").Set("projects", result).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
return err
}
return nil
} else {
return fmt.Errorf("user %#v already does not have role %#v", username, role)
return fmt.Errorf("user %s already does not manage project %s", username, project)
}
}
func FetchUser(ctx context.Context, db *sqlx.DB, username string) (*model.User, error) {
me := GetUser(ctx)
if me != nil && !me.HasRole(RoleAdmin) && !me.HasRole(RoleSupport) && me.Username != username {
if me != nil && me.Username != username && me.HasNotRoles([]Role{RoleAdmin, RoleSupport, RoleManager}) {
return nil, errors.New("forbidden")
}
@@ -179,9 +273,13 @@ func FetchUser(ctx context.Context, db *sqlx.DB, username string) (*model.User,
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
RunWith(db).QueryRow().Scan(&name, &email); err != nil {
if err == sql.ErrNoRows {
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
/* since FetchUser will be called to retrieve full name and mail for every job in query/list */
// log.Warnf("User '%s' Not found in DB", username)
return nil, nil
}
log.Warnf("Error while fetching user '%s'", username)
return nil, err
}

View File

@@ -25,6 +25,7 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
LdapConfig: nil,
SessionMaxAge: "168h",
StopJobsExceedingWalltime: 0,
ShortRunningJobsDuration: 5 * 60,
UiDefaults: map[string]interface{}{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
@@ -34,7 +35,6 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
"plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3,
"plot_list_hideShortRunningJobs": 5 * 60,
"plot_list_jobsPerPage": 50,
"plot_list_selectedMetrics": []string{"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"},
"plot_view_plotsPerRow": 3,
@@ -49,7 +49,7 @@ func Init(flagConfigFile string) {
raw, err := os.ReadFile(flagConfigFile)
if err != nil {
if !os.IsNotExist(err) {
log.Fatal(err)
log.Fatalf("CONFIG ERROR: %v", err)
}
} else {
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
@@ -58,7 +58,7 @@ func Init(flagConfigFile string) {
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil {
log.Fatal(err)
log.Fatalf("could not decode: %v", err)
}
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {

View File

@@ -19,7 +19,7 @@ func TestInit(t *testing.T) {
func TestInitMinimal(t *testing.T) {
fp := "../../docs/config.json"
Init(fp)
if Keys.Addr != "0.0.0.0:8080" {
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:8080", Keys.Addr)
if Keys.Addr != "127.0.0.1:8080" {
t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
}
}

View File

@@ -89,6 +89,7 @@ type ComplexityRoot struct {
Exclusive func(childComplexity int) int
ID func(childComplexity int) int
JobID func(childComplexity int) int
JobName func(childComplexity int) int
MetaData func(childComplexity int) int
MonitoringStatus func(childComplexity int) int
NumAcc func(childComplexity int) int
@@ -131,6 +132,7 @@ type ComplexityRoot struct {
HistDuration func(childComplexity int) int
HistNumNodes func(childComplexity int) int
ID func(childComplexity int) int
Name func(childComplexity int) int
ShortJobs func(childComplexity int) int
TotalCoreHours func(childComplexity int) int
TotalJobs func(childComplexity int) int
@@ -274,6 +276,8 @@ type ClusterResolver interface {
Partitions(ctx context.Context, obj *schema.Cluster) ([]string, error)
}
type JobResolver interface {
JobName(ctx context.Context, obj *schema.Job) (*string, error)
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
MetaData(ctx context.Context, obj *schema.Job) (interface{}, error)
@@ -466,6 +470,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Job.JobID(childComplexity), true
case "Job.jobName":
if e.complexity.Job.JobName == nil {
break
}
return e.complexity.Job.JobName(childComplexity), true
case "Job.metaData":
if e.complexity.Job.MetaData == nil {
break
@@ -676,6 +687,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.JobsStatistics.ID(childComplexity), true
case "JobsStatistics.name":
if e.complexity.JobsStatistics.Name == nil {
break
}
return e.complexity.JobsStatistics.Name(childComplexity), true
case "JobsStatistics.shortJobs":
if e.complexity.JobsStatistics.ShortJobs == nil {
break
@@ -1433,6 +1451,7 @@ type Job {
jobId: Int!
user: String!
project: String!
jobName: String
cluster: String!
subCluster: String!
startTime: Time!
@@ -1635,6 +1654,7 @@ input JobFilter {
arrayJobId: Int
user: StringInput
project: StringInput
jobName: StringInput
cluster: StringInput
partition: StringInput
duration: IntRange
@@ -1668,6 +1688,7 @@ input StringInput {
contains: String
startsWith: String
endsWith: String
in: [String!]
}
input IntRange { from: Int!, to: Int! }
@@ -1688,6 +1709,7 @@ type HistoPoint {
type JobsStatistics {
id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster
name: String # if User-Statistics: Given Name of Account (ID) Owner
totalJobs: Int! # Number of jobs that matched
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
@@ -3090,6 +3112,47 @@ func (ec *executionContext) fieldContext_Job_project(ctx context.Context, field
return fc, nil
}
func (ec *executionContext) _Job_jobName(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_jobName(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Job().JobName(rctx, obj)
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(*string)
fc.Result = res
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_jobName(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_cluster(ctx, field)
if err != nil {
@@ -4282,6 +4345,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(ctx context.Context
return ec.fieldContext_Job_user(ctx, field)
case "project":
return ec.fieldContext_Job_project(ctx, field)
case "jobName":
return ec.fieldContext_Job_jobName(ctx, field)
case "cluster":
return ec.fieldContext_Job_cluster(ctx, field)
case "subCluster":
@@ -4492,6 +4557,47 @@ func (ec *executionContext) fieldContext_JobsStatistics_id(ctx context.Context,
return fc, nil
}
func (ec *executionContext) _JobsStatistics_name(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_JobsStatistics_name(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.Name, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(*string)
fc.Result = res
return ec.marshalOString2ᚖstring(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_JobsStatistics_name(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "JobsStatistics",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
if err != nil {
@@ -5557,7 +5663,6 @@ func (ec *executionContext) _Mutation_createTag(ctx context.Context, field graph
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -5620,7 +5725,6 @@ func (ec *executionContext) _Mutation_deleteTag(ctx context.Context, field graph
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -5675,7 +5779,6 @@ func (ec *executionContext) _Mutation_addTagsToJob(ctx context.Context, field gr
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -5738,7 +5841,6 @@ func (ec *executionContext) _Mutation_removeTagsFromJob(ctx context.Context, fie
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -5801,7 +5903,6 @@ func (ec *executionContext) _Mutation_updateConfiguration(ctx context.Context, f
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
@@ -5993,7 +6094,6 @@ func (ec *executionContext) _Query_clusters(ctx context.Context, field graphql.C
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6047,7 +6147,6 @@ func (ec *executionContext) _Query_tags(ctx context.Context, field graphql.Colle
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6099,7 +6198,6 @@ func (ec *executionContext) _Query_user(ctx context.Context, field graphql.Colle
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
@@ -6159,7 +6257,6 @@ func (ec *executionContext) _Query_allocatedNodes(ctx context.Context, field gra
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6220,7 +6317,6 @@ func (ec *executionContext) _Query_job(ctx context.Context, field graphql.Collec
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
@@ -6246,6 +6342,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
return ec.fieldContext_Job_user(ctx, field)
case "project":
return ec.fieldContext_Job_project(ctx, field)
case "jobName":
return ec.fieldContext_Job_jobName(ctx, field)
case "cluster":
return ec.fieldContext_Job_cluster(ctx, field)
case "subCluster":
@@ -6318,7 +6416,6 @@ func (ec *executionContext) _Query_jobMetrics(ctx context.Context, field graphql
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6381,7 +6478,6 @@ func (ec *executionContext) _Query_jobsFootprints(ctx context.Context, field gra
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
@@ -6439,7 +6535,6 @@ func (ec *executionContext) _Query_jobs(ctx context.Context, field graphql.Colle
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6504,7 +6599,6 @@ func (ec *executionContext) _Query_jobsStatistics(ctx context.Context, field gra
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6527,6 +6621,8 @@ func (ec *executionContext) fieldContext_Query_jobsStatistics(ctx context.Contex
switch field.Name {
case "id":
return ec.fieldContext_JobsStatistics_id(ctx, field)
case "name":
return ec.fieldContext_JobsStatistics_name(ctx, field)
case "totalJobs":
return ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
case "shortJobs":
@@ -6575,7 +6671,6 @@ func (ec *executionContext) _Query_jobsCount(ctx context.Context, field graphql.
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6636,7 +6731,6 @@ func (ec *executionContext) _Query_rooflineHeatmap(ctx context.Context, field gr
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6691,7 +6785,6 @@ func (ec *executionContext) _Query_nodeMetrics(ctx context.Context, field graphq
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
@@ -6754,7 +6847,6 @@ func (ec *executionContext) _Query___type(ctx context.Context, field graphql.Col
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
@@ -6828,7 +6920,6 @@ func (ec *executionContext) _Query___schema(ctx context.Context, field graphql.C
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
@@ -10650,7 +10741,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
asMap[k] = v
}
fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"}
fieldsInOrder := [...]string{"tags", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "flopsAnyAvg", "memBwAvg", "loadAvg", "memUsedMax"}
for _, k := range fieldsInOrder {
v, ok := asMap[k]
if !ok {
@@ -10697,6 +10788,14 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj int
if err != nil {
return it, err
}
case "jobName":
var err error
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("jobName"))
it.JobName, err = ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
if err != nil {
return it, err
}
case "cluster":
var err error
@@ -10890,7 +10989,7 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
asMap[k] = v
}
fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith"}
fieldsInOrder := [...]string{"eq", "contains", "startsWith", "endsWith", "in"}
for _, k := range fieldsInOrder {
v, ok := asMap[k]
if !ok {
@@ -10929,6 +11028,14 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj i
if err != nil {
return it, err
}
case "in":
var err error
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("in"))
it.In, err = ec.unmarshalOString2ᚕstringᚄ(ctx, v)
if err != nil {
return it, err
}
}
}
@@ -11261,6 +11368,23 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
if out.Values[i] == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
case "jobName":
field := field
innerFunc := func(ctx context.Context) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Job_jobName(ctx, field, obj)
return res
}
out.Concurrently(i, func() graphql.Marshaler {
return innerFunc(ctx)
})
case "cluster":
out.Values[i] = ec._Job_cluster(ctx, field, obj)
@@ -11570,6 +11694,10 @@ func (ec *executionContext) _JobsStatistics(ctx context.Context, sel ast.Selecti
if out.Values[i] == graphql.Null {
invalids++
}
case "name":
out.Values[i] = ec._JobsStatistics_name(ctx, field, obj)
case "totalJobs":
out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj)
@@ -11832,7 +11960,6 @@ func (ec *executionContext) _Mutation(ctx context.Context, sel ast.SelectionSet)
})
out := graphql.NewFieldSet(fields)
var invalids uint32
for i, field := range fields {
innerCtx := graphql.WithRootFieldContext(ctx, &graphql.RootFieldContext{
Object: field.Name,
@@ -11848,36 +11975,24 @@ func (ec *executionContext) _Mutation(ctx context.Context, sel ast.SelectionSet)
return ec._Mutation_createTag(ctx, field)
})
if out.Values[i] == graphql.Null {
invalids++
}
case "deleteTag":
out.Values[i] = ec.OperationContext.RootResolverMiddleware(innerCtx, func(ctx context.Context) (res graphql.Marshaler) {
return ec._Mutation_deleteTag(ctx, field)
})
if out.Values[i] == graphql.Null {
invalids++
}
case "addTagsToJob":
out.Values[i] = ec.OperationContext.RootResolverMiddleware(innerCtx, func(ctx context.Context) (res graphql.Marshaler) {
return ec._Mutation_addTagsToJob(ctx, field)
})
if out.Values[i] == graphql.Null {
invalids++
}
case "removeTagsFromJob":
out.Values[i] = ec.OperationContext.RootResolverMiddleware(innerCtx, func(ctx context.Context) (res graphql.Marshaler) {
return ec._Mutation_removeTagsFromJob(ctx, field)
})
if out.Values[i] == graphql.Null {
invalids++
}
case "updateConfiguration":
out.Values[i] = ec.OperationContext.RootResolverMiddleware(innerCtx, func(ctx context.Context) (res graphql.Marshaler) {
@@ -11889,9 +12004,6 @@ func (ec *executionContext) _Mutation(ctx context.Context, sel ast.SelectionSet)
}
}
out.Dispatch()
if invalids > 0 {
return graphql.Null
}
return out
}
@@ -11946,7 +12058,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
})
out := graphql.NewFieldSet(fields)
var invalids uint32
for i, field := range fields {
innerCtx := graphql.WithRootFieldContext(ctx, &graphql.RootFieldContext{
Object: field.Name,
@@ -11966,9 +12077,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_clusters(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -11989,9 +12097,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_tags(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12032,9 +12137,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_allocatedNodes(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12075,9 +12177,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_jobMetrics(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12118,9 +12217,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_jobs(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12141,9 +12237,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_jobsStatistics(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12164,9 +12257,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_jobsCount(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12187,9 +12277,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_rooflineHeatmap(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12210,9 +12297,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}()
res = ec._Query_nodeMetrics(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&invalids, 1)
}
return res
}
@@ -12240,9 +12324,6 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
}
out.Dispatch()
if invalids > 0 {
return graphql.Null
}
return out
}

View File

@@ -42,6 +42,7 @@ type JobFilter struct {
ArrayJobID *int `json:"arrayJobId"`
User *StringInput `json:"user"`
Project *StringInput `json:"project"`
JobName *StringInput `json:"jobName"`
Cluster *StringInput `json:"cluster"`
Partition *StringInput `json:"partition"`
Duration *schema.IntRange `json:"duration"`
@@ -72,6 +73,7 @@ type JobResultList struct {
type JobsStatistics struct {
ID string `json:"id"`
Name *string `json:"name"`
TotalJobs int `json:"totalJobs"`
ShortJobs int `json:"shortJobs"`
TotalWalltime int `json:"totalWalltime"`
@@ -102,10 +104,11 @@ type PageRequest struct {
}
type StringInput struct {
Eq *string `json:"eq"`
Contains *string `json:"contains"`
StartsWith *string `json:"startsWith"`
EndsWith *string `json:"endsWith"`
Eq *string `json:"eq"`
Contains *string `json:"contains"`
StartsWith *string `json:"startsWith"`
EndsWith *string `json:"endsWith"`
In []string `json:"in"`
}
type TimeRangeOutput struct {

View File

@@ -2,6 +2,7 @@ package graph
// This file will be automatically regenerated based on the schema, any resolver implementations
// will be copied through when generating and any unknown code will be moved to the end.
// Code generated by github.com/99designs/gqlgen version v0.17.24
import (
"context"
@@ -16,6 +17,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -24,6 +26,11 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
return r.Repo.Partitions(obj.Name)
}
// JobName is the resolver for the jobName field.
func (r *jobResolver) JobName(ctx context.Context, obj *schema.Job) (*string, error) {
return r.Repo.FetchJobName(obj)
}
// Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(&obj.ID)
@@ -43,6 +50,7 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name)
if err != nil {
log.Warn("Error while creating tag")
return nil, err
}
@@ -58,6 +66,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while adding tag to job")
return nil, err
}
@@ -65,10 +74,12 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
for _, tagId := range tagIds {
tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil {
log.Warn("Error while parsing tag id")
return nil, err
}
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
log.Warn("Error while adding tag")
return nil, err
}
}
@@ -80,6 +91,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while parsing job id")
return nil, err
}
@@ -87,10 +99,12 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
for _, tagId := range tagIds {
tid, err := strconv.ParseInt(tagId, 10, 64)
if err != nil {
log.Warn("Error while parsing tag id")
return nil, err
}
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
log.Warn("Error while removing tag")
return nil, err
}
}
@@ -101,6 +115,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
// UpdateConfiguration is the resolver for the updateConfiguration field.
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, auth.GetUser(ctx)); err != nil {
log.Warn("Error while updating user config")
return nil, err
}
@@ -126,6 +141,7 @@ func (r *queryResolver) User(ctx context.Context, username string) (*model.User,
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
data, err := r.Repo.AllocatedNodes(cluster)
if err != nil {
log.Warn("Error while fetching allocated nodes")
return nil, err
}
@@ -144,15 +160,17 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
numericId, err := strconv.ParseInt(id, 10, 64)
if err != nil {
log.Warn("Error while parsing job id")
return nil, err
}
job, err := r.Repo.FindById(numericId)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
if user := auth.GetUser(ctx); user != nil && !user.HasRole(auth.RoleAdmin) && !user.HasRole(auth.RoleSupport) && job.User != user.Username {
if user := auth.GetUser(ctx); user != nil && job.User != user.Username && user.HasNotRoles([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) {
return nil, errors.New("you are not allowed to see this job")
}
@@ -163,17 +181,23 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warn("Error while querying job for metrics")
return nil, err
}
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
if err != nil {
log.Warn("Error while loading job data")
return nil, err
}
res := []*model.JobMetricWithName{}
for name, md := range data {
for scope, metric := range md {
if metric.Scope != schema.MetricScope(scope) {
log.Panic("metric.Scope != schema.MetricScope(scope) : Should not happen!")
}
res = append(res, &model.JobMetricWithName{
Name: name,
Scope: scope,
@@ -201,11 +225,13 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
jobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
if err != nil {
log.Warn("Error while querying jobs")
return nil, err
}
count, err := r.Repo.CountJobs(ctx, filter)
if err != nil {
log.Warn("Error while counting jobs")
return nil, err
}
@@ -214,13 +240,14 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
// JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
return r.jobsStatistics(ctx, filter, groupBy)
return r.Repo.JobsStatistics(ctx, filter, groupBy)
}
// JobsCount is the resolver for the jobsCount field.
func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) {
counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit)
if err != nil {
log.Warn("Error while counting grouped jobs")
return nil, err
}
@@ -254,6 +281,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
log.Warn("Error while loading node data")
return nil, err
}

View File

@@ -6,197 +6,16 @@ package graph
import (
"context"
"database/sql"
"errors"
"fmt"
"math"
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
const ShortJobDuration int = 5 * 60
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
stats := map[string]*model.JobsStatistics{}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
var query sq.SelectBuilder
if groupBy == nil {
query = sq.Select(
"''",
"COUNT(job.id)",
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
corehoursCol,
).From("job")
} else {
col := groupBy2column[*groupBy]
query = sq.Select(
col,
"COUNT(job.id)",
"CAST(ROUND(SUM(job.duration) / 3600) as int)",
corehoursCol,
).From("job").GroupBy(col)
}
query = query.
Where("job.cluster = ?", cluster.Name).
Where("job.subcluster = ?", subcluster.Name)
query = repository.SecurityCheck(ctx, query)
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
return nil, err
}
for rows.Next() {
var id sql.NullString
var jobs, walltime, corehours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
return nil, err
}
if id.Valid {
if s, ok := stats[id.String]; ok {
s.TotalJobs += int(jobs.Int64)
s.TotalWalltime += int(walltime.Int64)
s.TotalCoreHours += int(corehours.Int64)
} else {
stats[id.String] = &model.JobsStatistics{
ID: id.String,
TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64),
TotalCoreHours: int(corehours.Int64),
}
}
}
}
}
}
if groupBy == nil {
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query = repository.SecurityCheck(ctx, query)
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
return nil, err
}
} else {
col := groupBy2column[*groupBy]
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query = repository.SecurityCheck(ctx, query)
for _, f := range filter {
query = repository.BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
return nil, err
}
for rows.Next() {
var id sql.NullString
var shortJobs sql.NullInt64
if err := rows.Scan(&id, &shortJobs); err != nil {
return nil, err
}
if id.Valid {
stats[id.String].ShortJobs = int(shortJobs.Int64)
}
}
}
// Calculating the histogram data is expensive, so only do it if needed.
// An explicit resolver can not be used because we need to know the filters.
histogramsNeeded := false
fields := graphql.CollectFieldsCtx(ctx, nil)
for _, col := range fields {
if col.Name == "histDuration" || col.Name == "histNumNodes" {
histogramsNeeded = true
}
}
res := make([]*model.JobsStatistics, 0, len(stats))
for _, stat := range stats {
res = append(res, stat)
id, col := "", ""
if groupBy != nil {
id = stat.ID
col = groupBy2column[*groupBy]
}
if histogramsNeeded {
var err error
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as int) as value`, time.Now().Unix())
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
if err != nil {
return nil, err
}
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
if err != nil {
return nil, err
}
}
}
return res, nil
}
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
// to add a condition to the query of the kind "<col> = <id>".
func (r *queryResolver) jobsStatisticsHistogram(ctx context.Context, value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
query = repository.SecurityCheck(ctx, query)
for _, f := range filters {
query = repository.BuildWhereClause(f, query)
}
if len(id) != 0 && len(col) != 0 {
query = query.Where(col+" = ?", id)
}
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
if err != nil {
return nil, err
}
points := make([]*model.HistoPoint, 0)
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
return nil, err
}
points = append(points, &point)
}
return points, nil
}
const MAX_JOBS_FOR_ANALYSIS = 500
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
@@ -208,10 +27,11 @@ func (r *queryResolver) rooflineHeatmap(
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
log.Error("Error while querying jobs for roofline")
return nil, err
}
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
}
fcols, frows := float64(cols), float64(rows)
@@ -228,19 +48,20 @@ func (r *queryResolver) rooflineHeatmap(
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil {
log.Error("Error while loading metrics for roofline")
return nil, err
}
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
if flops_ == nil && membw_ == nil {
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %d", job.ID)
return nil, fmt.Errorf("GRAPH/STATS > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
}
flops, ok1 := flops_["node"]
membw, ok2 := membw_["node"]
if !ok1 || !ok2 {
// TODO/FIXME:
return nil, errors.New("todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
return nil, errors.New("GRAPH/STATS > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
}
for n := 0; n < len(flops.Series); n++ {
@@ -272,10 +93,11 @@ func (r *queryResolver) rooflineHeatmap(
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
log.Error("Error while querying jobs for footprint")
return nil, err
}
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
}
avgs := make([][]schema.Float, len(metrics))
@@ -290,6 +112,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
}
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Error("Error while loading averages for footprint")
return nil, err
}

View File

@@ -16,6 +16,7 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -78,6 +79,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
var config CCMetricStoreConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err
}
@@ -124,11 +126,13 @@ func (ccms *CCMetricStore) doRequest(
buf := &bytes.Buffer{}
if err := json.NewEncoder(buf).Encode(body); err != nil {
log.Warn("Error while encoding request body")
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
if err != nil {
log.Warn("Error while building request body")
return nil, err
}
if ccms.jwt != "" {
@@ -137,6 +141,7 @@ func (ccms *CCMetricStore) doRequest(
res, err := ccms.client.Do(req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -146,6 +151,7 @@ func (ccms *CCMetricStore) doRequest(
var resBody ApiQueryResponse
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
log.Warn("Error while decoding result body")
return nil, err
}
@@ -160,6 +166,7 @@ func (ccms *CCMetricStore) LoadData(
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
if err != nil {
log.Warn("Error while building queries")
return nil, err
}
@@ -174,6 +181,7 @@ func (ccms *CCMetricStore) LoadData(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -200,6 +208,7 @@ func (ccms *CCMetricStore) LoadData(
for _, res := range row {
if res.Error != nil {
/* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
continue
}
@@ -240,7 +249,8 @@ func (ccms *CCMetricStore) LoadData(
}
if len(errors) != 0 {
return jobData, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
/* Returns list for "partial errors" */
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return jobData, nil
@@ -272,8 +282,8 @@ func (ccms *CCMetricStore) buildQueries(
remoteName := ccms.toRemoteName(metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil {
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
continue
}
@@ -483,7 +493,7 @@ func (ccms *CCMetricStore) buildQueries(
continue
}
return nil, nil, fmt.Errorf("TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
}
}
}
@@ -498,6 +508,7 @@ func (ccms *CCMetricStore) LoadStats(
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode})
if err != nil {
log.Warn("Error while building query")
return nil, err
}
@@ -512,6 +523,7 @@ func (ccms *CCMetricStore) LoadStats(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -521,7 +533,7 @@ func (ccms *CCMetricStore) LoadStats(
metric := ccms.toLocalName(query.Metric)
data := res[0]
if data.Error != nil {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
}
metricdata, ok := stats[metric]
@@ -531,7 +543,7 @@ func (ccms *CCMetricStore) LoadStats(
}
if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
}
metricdata[query.Hostname] = schema.MetricStatistics{
@@ -577,6 +589,7 @@ func (ccms *CCMetricStore) LoadNodeData(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
@@ -593,11 +606,12 @@ func (ccms *CCMetricStore) LoadNodeData(
metric := ccms.toLocalName(query.Metric)
qdata := res[0]
if qdata.Error != nil {
/* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error))
}
if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() {
// return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0.
}
@@ -626,7 +640,8 @@ func (ccms *CCMetricStore) LoadNodeData(
}
if len(errors) != 0 {
return data, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
/* Returns list of "partial errors" */
return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return data, nil

View File

@@ -10,11 +10,11 @@ import (
"encoding/json"
"errors"
"fmt"
"log"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
@@ -37,6 +37,7 @@ type InfluxDBv2DataRepository struct {
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
var config InfluxDBv2DataRepositoryConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err
}
@@ -71,7 +72,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
}
@@ -84,7 +85,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
switch scope {
case "node":
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
// log.Println("Note: Scope 'node' requested. ")
// log.Info("Scope 'node' requested. ")
query = fmt.Sprintf(`
from(bucket: "%s")
|> range(start: %s, stop: %s)
@@ -97,10 +98,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
measurementsCond, hostsCond)
case "socket":
log.Println("Note: Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
continue
case "core":
log.Println("Note: Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
continue
// Get Finest Granularity only, Set NULL to 0.0
// query = fmt.Sprintf(`
@@ -114,13 +115,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
// measurementsCond, hostsCond)
default:
log.Println("Note: Unknown Scope requested: Will return 'node' scope. ")
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node'")
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
}
rows, err := idb.queryClient.Query(ctx, query)
if err != nil {
log.Error("Error while performing query")
return nil, err
}
@@ -191,6 +193,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
// }
default:
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
}
@@ -201,21 +204,22 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// Get Stats
stats, err := idb.LoadStats(job, metrics, ctx)
if err != nil {
log.Warn("Error while loading statistics")
return nil, err
}
for _, scope := range scopes {
if scope == "node" { // No 'socket/core' support yet
for metric, nodes := range stats {
// log.Println(fmt.Sprintf("<< Add Stats for : Field %s >>", metric))
// log.Debugf("<< Add Stats for : Field %s >>", metric)
for node, stats := range nodes {
// log.Println(fmt.Sprintf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg ))
// log.Debugf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg )
for index, _ := range jobData[metric][scope].Series {
// log.Println(fmt.Sprintf("<< Try to add Stats to Series in Position %d >>", index))
// log.Debugf("<< Try to add Stats to Series in Position %d >>", index)
if jobData[metric][scope].Series[index].Hostname == node {
// log.Println(fmt.Sprintf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname))
jobData[metric][scope].Series[index].Statistics = schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
// log.Println(fmt.Sprintf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg))
// log.Debugf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname)
jobData[metric][scope].Series[index].Statistics = &schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
// log.Debugf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg)
}
}
}
@@ -227,9 +231,9 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// for _, scope := range scopes {
// for _, met := range metrics {
// for _, series := range jobData[met][scope].Series {
// log.Println(fmt.Sprintf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// log.Debugf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// len(series.Data), met, series.Hostname, scope,
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg))
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg)
// }
// }
// }
@@ -248,7 +252,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
}
@@ -257,7 +261,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
// lenMet := len(metrics)
for _, metric := range metrics {
// log.Println(fmt.Sprintf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet))
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
query := fmt.Sprintf(`
data = from(bucket: "%s")
@@ -274,6 +278,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
rows, err := idb.queryClient.Query(ctx, query)
if err != nil {
log.Error("Error while performing query")
return nil, err
}
@@ -284,17 +289,17 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
avg, avgok := row.ValueByKey("avg").(float64)
if !avgok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg))
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
avg = 0.0
}
min, minok := row.ValueByKey("min").(float64)
if !minok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min))
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
min = 0.0
}
max, maxok := row.ValueByKey("max").(float64)
if !maxok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max))
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
max = 0.0
}
@@ -318,7 +323,7 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
// TODO : Implement to be used in Analysis- und System/Node-View
log.Println(fmt.Sprintf("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes))
log.Infof("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
return nil, errors.New("unimplemented for InfluxDBv2DataRepository")
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
}

View File

@@ -46,6 +46,7 @@ func Init(disableArchive bool) error {
Kind string `json:"kind"`
}
if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil {
log.Warn("Error while unmarshaling raw json MetricDataRepository")
return err
}
@@ -55,13 +56,16 @@ func Init(disableArchive bool) error {
mdr = &CCMetricStore{}
case "influxdb":
mdr = &InfluxDBv2DataRepository{}
case "prometheus":
mdr = &PrometheusDataRepository{}
case "test":
mdr = &TestMetricDataRepository{}
default:
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", kind.Kind, cluster.Name)
return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
}
if err := mdr.Init(cluster.MetricDataRepository); err != nil {
log.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
return err
}
metricDataRepos[cluster.Name] = mdr
@@ -88,7 +92,7 @@ func LoadData(job *schema.Job,
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
@@ -105,8 +109,9 @@ func LoadData(job *schema.Job,
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Errorf("partial error: %s", err.Error())
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
@@ -114,6 +119,7 @@ func LoadData(job *schema.Job,
} else {
jd, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
@@ -156,10 +162,12 @@ func LoadData(job *schema.Job,
}
prepareJobData(job, jd, scopes)
return jd, ttl, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
@@ -179,11 +187,12 @@ func LoadAverages(
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
@@ -214,7 +223,7 @@ func LoadNodeData(
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster)
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
@@ -226,14 +235,15 @@ func LoadNodeData(
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Errorf("partial error: %s", err.Error())
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", cluster)
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
@@ -300,6 +310,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}

View File

@@ -0,0 +1,446 @@
// Copyright (C) 2022 DKRZ
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricdata
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"math"
"net/http"
"os"
"regexp"
"sort"
"strings"
"sync"
"text/template"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
promapi "github.com/prometheus/client_golang/api"
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
promcfg "github.com/prometheus/common/config"
promm "github.com/prometheus/common/model"
)
type PrometheusDataRepositoryConfig struct {
Url string `json:"url"`
Username string `json:"username,omitempty"`
Suffix string `json:"suffix,omitempty"`
Templates map[string]string `json:"query-templates"`
}
type PrometheusDataRepository struct {
client promapi.Client
queryClient promv1.API
suffix string
templates map[string]*template.Template
}
type PromQLArgs struct {
Nodes string
}
type Trie map[rune]Trie
var logOnce sync.Once
func contains(s []schema.MetricScope, str schema.MetricScope) bool {
for _, v := range s {
if v == str {
return true
}
}
return false
}
func MinMaxMean(data []schema.Float) (float64, float64, float64) {
if len(data) == 0 {
return 0.0, 0.0, 0.0
}
min := math.MaxFloat64
max := -math.MaxFloat64
var sum float64
var n float64
for _, val := range data {
if val.IsNaN() {
continue
}
sum += float64(val)
n += 1
if float64(val) > max {
max = float64(val)
}
if float64(val) < min {
min = float64(val)
}
}
return min, max, sum / n
}
// Rewritten from
// https://github.com/ermanh/trieregex/blob/master/trieregex/trieregex.py
func nodeRegex(nodes []string) string {
root := Trie{}
// add runes of each compute node to trie
for _, node := range nodes {
_trie := root
for _, c := range node {
if _, ok := _trie[c]; !ok {
_trie[c] = Trie{}
}
_trie = _trie[c]
}
_trie['*'] = Trie{}
}
// recursively build regex from rune trie
var trieRegex func(trie Trie, reset bool) string
trieRegex = func(trie Trie, reset bool) string {
if reset == true {
trie = root
}
if len(trie) == 0 {
return ""
}
if len(trie) == 1 {
for key, _trie := range trie {
if key == '*' {
return ""
}
return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)
}
} else {
sequences := []string{}
for key, _trie := range trie {
if key != '*' {
sequences = append(sequences, regexp.QuoteMeta(string(key))+trieRegex(_trie, false))
}
}
sort.Slice(sequences, func(i, j int) bool {
return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j])
})
var result string
// single edge from this tree node
if len(sequences) == 1 {
result = sequences[0]
if len(result) > 1 {
result = "(?:" + result + ")"
}
// multiple edges, each length 1
} else if s := strings.Join(sequences, ""); len(s) == len(sequences) {
// char or numeric range
if len(s)-1 == int(s[len(s)-1])-int(s[0]) {
result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1])
// char or numeric set
} else {
result = "[" + s + "]"
}
// multiple edges of different lengths
} else {
result = "(?:" + strings.Join(sequences, "|") + ")"
}
if _, ok := trie['*']; ok {
result += "?"
}
return result
}
return ""
}
return trieRegex(root, true)
}
func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
var config PrometheusDataRepositoryConfig
// parse config
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config")
return err
}
// support basic authentication
var rt http.RoundTripper = nil
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
prom_pw := promcfg.Secret(prom_pw)
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
} else {
if config.Username != "" {
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
}
}
// init client
client, err := promapi.NewClient(promapi.Config{
Address: config.Url,
RoundTripper: rt,
})
if err != nil {
log.Error("Error while initializing new prometheus client")
return err
}
// init query client
pdb.client = client
pdb.queryClient = promv1.NewAPI(pdb.client)
// site config
pdb.suffix = config.Suffix
// init query templates
pdb.templates = make(map[string]*template.Template)
for metric, templ := range config.Templates {
pdb.templates[metric], err = template.New(metric).Parse(templ)
if err == nil {
log.Debugf("Added PromQL template for %s: %s", metric, templ)
} else {
log.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric)
}
}
return nil
}
// TODO: respect scope argument
func (pdb *PrometheusDataRepository) FormatQuery(
metric string,
scope schema.MetricScope,
nodes []string,
cluster string) (string, error) {
args := PromQLArgs{}
if len(nodes) > 0 {
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
} else {
args.Nodes = fmt.Sprintf(".*%s", pdb.suffix)
}
buf := &bytes.Buffer{}
if templ, ok := pdb.templates[metric]; ok {
err := templ.Execute(buf, args)
if err != nil {
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ))
} else {
query := buf.String()
log.Debugf("PromQL: %s", query)
return query, nil
}
} else {
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > No PromQL for metric %s configured.", metric))
}
}
// Convert PromAPI row to CC schema.Series
func (pdb *PrometheusDataRepository) RowToSeries(
from time.Time,
step int64,
steps int64,
row *promm.SampleStream) schema.Series {
ts := from.Unix()
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
// init array of expected length with NaN
values := make([]schema.Float, steps+1)
for i, _ := range values {
values[i] = schema.NaN
}
// copy recorded values from prom sample pair
for _, v := range row.Values {
idx := (v.Timestamp.Unix() - ts) / step
values[idx] = schema.Float(v.Value)
}
min, max, mean := MinMaxMean(values)
// output struct
return schema.Series{
Hostname: hostname,
Data: values,
Statistics: &schema.MetricStatistics{
Avg: mean,
Min: min,
Max: max,
},
}
}
func (pdb *PrometheusDataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
// TODO respect requested scope
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode)
}
jobData := make(schema.JobData)
// parse job specs
nodes := make([]string, len(job.Resources))
for i, resource := range job.Resources {
nodes[i] = resource.Hostname
}
from := job.StartTime
to := job.StartTime.Add(time.Duration(job.Duration) * time.Second)
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func() {
log.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue
}
for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(job.Cluster, metric)
if metricConfig == nil {
log.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
return nil, errors.New("Prometheus config error")
}
query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster)
if err != nil {
log.Warn("Error while formatting prometheus query")
return nil, err
}
// ranged query over all job nodes
r := promv1.Range{
Start: from,
End: to,
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
return nil, errors.New("Prometheus query error")
}
if len(warnings) > 0 {
log.Warnf("Warnings: %v\n", warnings)
}
// init data structures
if _, ok := jobData[metric]; !ok {
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
}
jobMetric, ok := jobData[metric][scope]
if !ok {
jobMetric = &schema.JobMetric{
Unit: metricConfig.Unit,
Scope: scope,
Timestep: metricConfig.Timestep,
Series: make([]schema.Series, 0),
}
jobData[metric][scope] = jobMetric
}
step := int64(metricConfig.Timestep)
steps := int64(to.Sub(from).Seconds()) / step
// iter rows of host, metric, values
for _, row := range result.(promm.Matrix) {
jobMetric.Series = append(jobMetric.Series,
pdb.RowToSeries(from, step, steps, row))
}
// sort by hostname to get uniform coloring
sort.Slice(jobMetric.Series, func(i, j int) bool {
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
})
}
}
return jobData, nil
}
// TODO change implementation to precomputed/cached stats
func (pdb *PrometheusDataRepository) LoadStats(
job *schema.Job,
metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
// map of metrics of nodes of stats
stats := map[string]map[string]schema.MetricStatistics{}
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
if err != nil {
log.Warn("Error while loading job for stats")
return nil, err
}
for metric, metricData := range data {
stats[metric] = make(map[string]schema.MetricStatistics)
for _, series := range metricData[schema.MetricScopeNode].Series {
stats[metric][series.Hostname] = *series.Statistics
}
}
return stats, nil
}
func (pdb *PrometheusDataRepository) LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
t0 := time.Now()
// Map of hosts of metrics of value slices
data := make(map[string]map[string][]*schema.JobMetric)
// query db for each metric
// TODO: scopes seems to be always empty
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode)
}
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func() {
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
})
continue
}
for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(cluster, metric)
if metricConfig == nil {
log.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
return nil, errors.New("Prometheus config error")
}
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
if err != nil {
log.Warn("Error while formatting prometheus query")
return nil, err
}
// ranged query over all nodes
r := promv1.Range{
Start: from,
End: to,
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
return nil, errors.New("Prometheus query error")
}
if len(warnings) > 0 {
log.Warnf("Warnings: %v\n", warnings)
}
step := int64(metricConfig.Timestep)
steps := int64(to.Sub(from).Seconds()) / step
// iter rows of host, metric, values
for _, row := range result.(promm.Matrix) {
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
hostdata, ok := data[hostname]
if !ok {
hostdata = make(map[string][]*schema.JobMetric)
data[hostname] = hostdata
}
// output per host and metric
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
Unit: metricConfig.Unit,
Scope: scope,
Timestep: metricConfig.Timestep,
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
},
)
}
}
}
t1 := time.Since(t0)
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
return data, nil
}

View File

@@ -5,12 +5,15 @@
package repository
import (
"database/sql"
"fmt"
"log"
"sync"
"time"
"github.com/jmoiron/sqlx"
"github.com/mattn/go-sqlite3"
"github.com/qustavo/sqlhooks/v2"
)
var (
@@ -19,7 +22,8 @@ var (
)
type DBConnection struct {
DB *sqlx.DB
DB *sqlx.DB
Driver string
}
func Connect(driver string, db string) {
@@ -28,7 +32,9 @@ func Connect(driver string, db string) {
dbConnOnce.Do(func() {
if driver == "sqlite3" {
dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db))
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
dbHandle, err = sqlx.Open("sqlite3WithHooks", fmt.Sprintf("%s?_foreign_keys=on", db))
// dbHandle, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", db))
if err != nil {
log.Fatal(err)
}
@@ -39,7 +45,7 @@ func Connect(driver string, db string) {
} else if driver == "mysql" {
dbHandle, err = sqlx.Open("mysql", fmt.Sprintf("%s?multiStatements=true", db))
if err != nil {
log.Fatal(err)
log.Fatalf("sqlx.Open() error: %v", err)
}
dbHandle.SetConnMaxLifetime(time.Minute * 3)
@@ -49,7 +55,8 @@ func Connect(driver string, db string) {
log.Fatalf("unsupported database driver: %s", driver)
}
dbConnInstance = &DBConnection{DB: dbHandle}
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
checkDBVersion(driver, dbHandle.DB)
})
}

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
// Hooks satisfies the sqlhook.Hooks interface
type Hooks struct{}
// Before hook will print the query with it's args and return the context with the timestamp
func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
log.Infof("SQL query %s %q", query, args)
return context.WithValue(ctx, "begin", time.Now()), nil
}
// After hook will get the timestamp registered on the Before hook and print the elapsed time
func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
begin := ctx.Value("begin").(time.Time)
log.Infof("Took: %s\n", time.Since(begin))
return ctx, nil
}

View File

@@ -20,67 +20,6 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/units"
)
// `AUTO_INCREMENT` is in a comment because of this hack:
// https://stackoverflow.com/a/41028314 (sqlite creates unique ids automatically)
const JobsDBSchema string = `
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tag;
CREATE TABLE job (
id INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
`
// Indexes are created after the job-archive is traversed for faster inserts.
const JobsDbIndexes string = `
CREATE INDEX job_by_user ON job (user);
CREATE INDEX job_by_starttime ON job (start_time);
CREATE INDEX job_by_job_id ON job (job_id);
CREATE INDEX job_by_state ON job (job_state);
`
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
@@ -96,40 +35,44 @@ func HandleImportFlag(flag string) error {
for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":")
if len(files) != 2 {
return fmt.Errorf("invalid import flag format")
return fmt.Errorf("REPOSITORY/INIT > invalid import flag format")
}
raw, err := os.ReadFile(files[0])
if err != nil {
log.Warn("Error while reading metadata file for import")
return err
}
// if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job meta: %v", err)
if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("REPOSITORY/INIT > validate job meta: %v", err)
}
}
// }
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil {
log.Warn("Error while decoding raw json metadata for import")
return err
}
raw, err = os.ReadFile(files[1])
if err != nil {
log.Warn("Error while reading jobdata file for import")
return err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Data, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job data: %v", err)
return fmt.Errorf("REPOSITORY/INIT > validate job data: %v", err)
}
}
dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil {
log.Warn("Error while decoding raw json jobdata for import")
return err
}
@@ -138,10 +81,11 @@ func HandleImportFlag(flag string) error {
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil {
log.Warn("Error while finding job in jobRepository")
return err
}
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
}
job := schema.Job{
@@ -157,38 +101,45 @@ func HandleImportFlag(flag string) error {
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Warn("Error while marshaling job resources")
return err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
log.Warn("Error while marshaling job metadata")
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
log.Warn("BaseJob SanityChecks failed")
return err
}
if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
log.Error("Error while importing job")
return err
}
res, err := GetConnection().DB.NamedExec(NamedJobInsert, job)
if err != nil {
log.Warn("Error while NamedJobInsert")
return err
}
id, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return err
}
for _, tag := range job.Tags {
if _, err := GetJobRepository().AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
log.Error("Error while adding or creating tag")
return err
}
}
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
log.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
}
return nil
}
@@ -200,21 +151,17 @@ func InitDB() error {
starttime := time.Now()
log.Print("Building job table...")
// Basic database structure:
_, err := db.DB.Exec(JobsDBSchema)
if err != nil {
return err
}
// Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT.
tx, err := db.DB.Beginx()
if err != nil {
log.Warn("Error while bundling transactions")
return err
}
stmt, err := tx.PrepareNamed(NamedJobInsert)
if err != nil {
log.Warn("Error while preparing namedJobInsert")
return err
}
tags := make(map[string]int64)
@@ -236,12 +183,14 @@ func InitDB() error {
if i%10 == 0 {
if tx != nil {
if err := tx.Commit(); err != nil {
log.Warn("Error while committing transactions for jobMeta")
return err
}
}
tx, err = db.DB.Beginx()
if err != nil {
log.Warn("Error while bundling transactions for jobMeta")
return err
}
@@ -264,34 +213,34 @@ func InitDB() error {
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
if err := SanityChecks(&job.BaseJob); err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
res, err := stmt.Exec(job)
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
id, err := res.LastInsertId()
if err != nil {
log.Errorf("repository initDB()- %v", err)
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
@@ -302,16 +251,19 @@ func InitDB() error {
if !ok {
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil {
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
return err
}
tagId, err = res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return err
}
tags[tagstr] = tagId
}
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", id, tagId)
return err
}
}
@@ -322,16 +274,11 @@ func InitDB() error {
}
if errorOccured > 0 {
log.Errorf("Error in import of %d jobs!", errorOccured)
log.Warnf("Error in import of %d jobs!", errorOccured)
}
if err := tx.Commit(); err != nil {
return err
}
// Create indexes after inserts so that they do not
// need to be continually updated.
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
log.Warn("Error while committing SQL transactions")
return err
}
@@ -342,13 +289,14 @@ func InitDB() error {
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := archive.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
return fmt.Errorf("no such cluster: %v", job.Cluster)
}
if err := archive.AssignSubCluster(job); err != nil {
log.Warn("Error while assigning subcluster to job")
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
return fmt.Errorf("not a valid job state: %v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")

View File

@@ -14,8 +14,12 @@ import (
"sync"
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -29,10 +33,14 @@ var (
)
type JobRepository struct {
DB *sqlx.DB
DB *sqlx.DB
driver string
stmtCache *sq.StmtCache
cache *lrucache.Cache
archiveChannel chan *schema.Job
archivePending sync.WaitGroup
}
func GetJobRepository() *JobRepository {
@@ -40,10 +48,15 @@ func GetJobRepository() *JobRepository {
db := GetConnection()
jobRepoInstance = &JobRepository{
DB: db.DB,
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
DB: db.DB,
driver: db.Driver,
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
archiveChannel: make(chan *schema.Job, 128),
}
// start archiving worker
go jobRepoInstance.archivingWorker()
})
return jobRepoInstance
@@ -60,14 +73,20 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources /*&job.MetaData*/); err != nil {
&job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
log.Warn("Error while unmarhsaling raw resources json")
return nil, err
}
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err
// }
job.StartTime = time.Unix(job.StartTimeUnix, 0)
if job.Duration == 0 && job.State == schema.JobStateRunning {
job.Duration = int32(time.Since(job.StartTime).Seconds())
@@ -77,11 +96,14 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
return job, nil
}
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
func (r *JobRepository) FetchJobName(job *schema.Job) (*string, error) {
start := time.Now()
cachekey := fmt.Sprintf("metadata:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.MetaData = cached.(map[string]string)
return job.MetaData, nil
if jobName := job.MetaData["jobName"]; jobName != "" {
return &jobName, nil
}
}
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
@@ -98,6 +120,40 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
}
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
log.Infof("Timer FetchJobName %s", time.Since(start))
if jobName := job.MetaData["jobName"]; jobName != "" {
return &jobName, nil
} else {
return new(string), nil
}
}
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
start := time.Now()
cachekey := fmt.Sprintf("metadata:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.MetaData = cached.(map[string]string)
return job.MetaData, nil
}
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
log.Warn("Error while scanning for job metadata")
return nil, err
}
if len(job.RawMetaData) == 0 {
return nil, nil
}
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
log.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
log.Infof("Timer FetchMetadata %s", time.Since(start))
return job.MetaData, nil
}
@@ -106,6 +162,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
r.cache.Del(cachekey)
if job.MetaData == nil {
if _, err = r.FetchMetadata(job); err != nil {
log.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
return err
}
}
@@ -122,10 +179,12 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
}
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
log.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
return err
}
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
return err
}
@@ -143,6 +202,7 @@ func (r *JobRepository) Find(
cluster *string,
startTime *int64) (*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
@@ -153,9 +213,50 @@ func (r *JobRepository) Find(
q = q.Where("job.start_time = ?", *startTime)
}
log.Infof("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindAll(
jobId *int64,
cluster *string,
startTime *int64) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 10)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Infof("Timer FindAll %s", time.Since(start))
return jobs, nil
}
// FindById executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
@@ -171,12 +272,12 @@ func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("encoding resources field failed: %w", err)
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("encoding metaData field failed: %w", err)
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
res, err := r.DB.NamedExec(`INSERT INTO job (
@@ -210,6 +311,29 @@ func (r *JobRepository) Stop(
return
}
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
var cnt int
qs := fmt.Sprintf("SELECT count(*) FROM job WHERE job.start_time < %d", startTime)
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime)
if err != nil {
log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err)
} else {
log.Infof("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
}
return cnt, err
}
func (r *JobRepository) DeleteJobById(id int64) error {
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id)
if err != nil {
log.Errorf("DeleteJobById(%d): error %#v", id, err)
} else {
log.Infof("DeleteJobById(%d): Success", id)
}
return err
}
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
func (r *JobRepository) CountGroupedJobs(
ctx context.Context,
@@ -218,6 +342,7 @@ func (r *JobRepository) CountGroupedJobs(
weight *model.Weights,
limit *int) (map[string]int, error) {
start := time.Now()
if !aggreg.IsValid() {
return nil, errors.New("invalid aggregate")
}
@@ -232,11 +357,17 @@ func (r *JobRepository) CountGroupedJobs(
now := time.Now().Unix()
count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now)
runner = r.DB
default:
log.Infof("CountGroupedJobs() Weight %v unknown.", *weight)
}
}
q := sq.Select("job."+string(aggreg), count).From("job").GroupBy("job." + string(aggreg)).OrderBy("count DESC")
q = SecurityCheck(ctx, q)
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job."+string(aggreg)).OrderBy("count DESC"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
q = BuildWhereClause(f, q)
}
@@ -247,6 +378,7 @@ func (r *JobRepository) CountGroupedJobs(
counts := map[string]int{}
rows, err := q.RunWith(runner).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -254,12 +386,14 @@ func (r *JobRepository) CountGroupedJobs(
var group string
var count int
if err := rows.Scan(&group, &count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
counts[group] = count
}
log.Infof("Timer CountGroupedJobs %s", time.Since(start))
return counts, nil
}
@@ -273,7 +407,7 @@ func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) Archive(
func (r *JobRepository) MarkArchived(
jobId int64,
monitoringStatus int32,
metricStats map[string]schema.JobStatistics) error {
@@ -296,56 +430,165 @@ func (r *JobRepository) Archive(
stmt = stmt.Set("net_bw_avg", stats.Avg)
case "file_bw":
stmt = stmt.Set("file_bw_avg", stats.Avg)
default:
log.Infof("MarkArchived() Metric '%v' unknown", metric)
}
}
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
log.Warn("Error while marking job as archived")
return err
}
return nil
}
var ErrNotFound = errors.New("no such job or user")
// Archiving worker thread
func (r *JobRepository) archivingWorker() {
for {
select {
case job, ok := <-r.archiveChannel:
if !ok {
break
}
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := r.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// FindJobOrUser returns a job database ID or a username if a job or user
// machtes the search term. As 0 is a valid job id, check if username is ""
// instead in order to check what matched. If nothing matches the search,
// `ErrNotFound` is returned.
func (r *JobRepository) FindJobOrUser(
ctx context.Context,
searchterm string) (job int64, username string, err error) {
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
user := auth.GetUser(ctx)
if id, err := strconv.Atoi(searchterm); err == nil {
qb := sq.Select("job.id").From("job").Where("job.job_id = ?", id)
if user != nil && !user.HasRole(auth.RoleAdmin) && !user.HasRole(auth.RoleSupport) {
qb = qb.Where("job.user = ?", user.Username)
}
// Update the jobs database entry one last time:
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
continue
}
err := qb.RunWith(r.stmtCache).QueryRow().Scan(&job)
if err != nil && err != sql.ErrNoRows {
return 0, "", err
} else if err == nil {
return job, "", nil
log.Printf("archiving job (dbid: %d) successful", job.ID)
r.archivePending.Done()
}
}
}
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleSupport) {
err := sq.Select("job.user").Distinct().From("job").
Where("job.user = ?", searchterm).
RunWith(r.stmtCache).QueryRow().Scan(&username)
if err != nil && err != sql.ErrNoRows {
return 0, "", err
} else if err == nil {
return 0, username, nil
// Trigger async archiving
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
r.archivePending.Add(1)
r.archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func (r *JobRepository) WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
r.archivePending.Wait()
}
var ErrNotFound = errors.New("no such jobname, project or user")
var ErrForbidden = errors.New("not authorized")
// FindJobnameOrUserOrProject returns a jobName or a username or a projectId if a jobName or user or project matches the search term.
// If query is found to be an integer (= conversion to INT datatype succeeds), skip back to parent call
// If nothing matches the search, `ErrNotFound` is returned.
func (r *JobRepository) FindUserOrProjectOrJobname(ctx context.Context, searchterm string) (username string, project string, metasnip string, err error) {
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
return "", "", "", nil
} else { // Has to have letters and logged-in user for other guesses
user := auth.GetUser(ctx)
if user != nil {
// Find username in jobs (match)
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false)
if uresult != "" {
return uresult, "", "", nil
}
// Find username by name (like)
nresult, _ := r.FindColumnValue(user, searchterm, "user", "username", "name", true)
if nresult != "" {
return nresult, "", "", nil
}
// Find projectId in jobs (match)
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
if presult != "" {
return "", presult, "", nil
}
// Still no return (or not authorized for above): Try JobName
// Match Metadata, on hit, parent method redirects to jobName GQL query
err := sq.Select("job.cluster").Distinct().From("job").
Where("job.meta_data LIKE ?", "%"+searchterm+"%").
RunWith(r.stmtCache).QueryRow().Scan(&metasnip)
if err != nil && err != sql.ErrNoRows {
return "", "", "", err
} else if err == nil {
return "", "", metasnip[0:1], nil
}
}
return "", "", "", ErrNotFound
}
}
return 0, "", ErrNotFound
func (r *JobRepository) FindColumnValue(user *auth.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) {
compareStr := " = ?"
query := searchterm
if isLike == true {
compareStr = " LIKE ?"
query = "%" + searchterm + "%"
}
if user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) {
err := sq.Select(table+"."+selectColumn).Distinct().From(table).
Where(table+"."+whereColumn+compareStr, query).
RunWith(r.stmtCache).QueryRow().Scan(&result)
if err != nil && err != sql.ErrNoRows {
return "", err
} else if err == nil {
return result, nil
}
return "", ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query '%s' on table '%s' : Forbidden", user.Name, query, table)
return "", ErrForbidden
}
}
func (r *JobRepository) FindColumnValues(user *auth.User, query string, table string, selectColumn string, whereColumn string) (results []string, err error) {
emptyResult := make([]string, 0)
if user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) {
rows, err := sq.Select(table+"."+selectColumn).Distinct().From(table).
Where(table+"."+whereColumn+" LIKE ?", fmt.Sprint("%", query, "%")).
RunWith(r.stmtCache).Query()
if err != nil && err != sql.ErrNoRows {
return emptyResult, err
} else if err == nil {
for rows.Next() {
var result string
err := rows.Scan(&result)
if err != nil {
rows.Close()
log.Warnf("Error while scanning rows: %v", err)
return emptyResult, err
}
results = append(results, result)
}
return results, nil
}
return emptyResult, ErrNotFound
} else {
log.Infof("Non-Admin User %s : Requested Query '%s' on table '%s' : Forbidden", user.Name, query, table)
return emptyResult, ErrForbidden
}
}
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
var err error
start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
@@ -357,6 +600,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
if err != nil {
return nil, err
}
log.Infof("Timer Partitions %s", time.Since(start))
return partitions.([]string), nil
}
@@ -364,12 +608,14 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
// Hosts with zero jobs running on them will not show up!
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
start := time.Now()
subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job").
Where("job.job_state = 'running'").
Where("job.cluster = ?", cluster).
RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -380,9 +626,11 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
var resources []*schema.Resource
var subcluster string
if err := rows.Scan(&raw, &subcluster); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if err := json.Unmarshal(raw, &resources); err != nil {
log.Warn("Error while unmarshaling raw resources json")
return nil, err
}
@@ -397,11 +645,13 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
}
}
log.Infof("Timer AllocatedNodes %s", time.Since(start))
return subclusters, nil
}
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now()
res, err := sq.Update("job").
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
Set("duration", 0).
@@ -411,16 +661,255 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
RunWith(r.DB).Exec()
if err != nil {
log.Warn("Error while stopping jobs exceeding walltime")
return err
}
rowsAffected, err := res.RowsAffected()
if err != nil {
log.Warn("Error while fetching affected rows after stopping due to exceeded walltime")
return err
}
if rowsAffected > 0 {
log.Warnf("%d jobs have been marked as failed due to running too long", rowsAffected)
log.Infof("%d jobs have been marked as failed due to running too long", rowsAffected)
}
log.Infof("Timer StopJobsExceedingWalltimeBy %s", time.Since(start))
return nil
}
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *JobRepository) JobsStatistics(ctx context.Context,
filter []*model.JobFilter,
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
start := time.Now()
// In case `groupBy` is nil (not used), the model.JobsStatistics used is at the key '' (empty string)
stats := map[string]*model.JobsStatistics{}
var castType string
if r.driver == "sqlite3" {
castType = "int"
} else if r.driver == "mysql" {
castType = "unsigned"
}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as %s)", subcluster.SocketsPerNode, subcluster.CoresPerSocket, castType)
var rawQuery sq.SelectBuilder
if groupBy == nil {
rawQuery = sq.Select(
"''",
"COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
corehoursCol,
).From("job")
} else {
col := groupBy2column[*groupBy]
rawQuery = sq.Select(
col,
"COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
corehoursCol,
).From("job").GroupBy(col)
}
rawQuery = rawQuery.
Where("job.cluster = ?", cluster.Name).
Where("job.subcluster = ?", subcluster.Name)
query, qerr := SecurityCheck(ctx, rawQuery)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
log.Warn("Error while querying DB for job statistics")
return nil, err
}
for rows.Next() {
var id sql.NullString
var jobs, walltime, corehours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
if s, ok := stats[id.String]; ok {
s.TotalJobs += int(jobs.Int64)
s.TotalWalltime += int(walltime.Int64)
s.TotalCoreHours += int(corehours.Int64)
} else {
stats[id.String] = &model.JobsStatistics{
ID: id.String,
TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64),
TotalCoreHours: int(corehours.Int64),
}
}
}
}
}
}
if groupBy == nil {
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", config.Keys.ShortRunningJobsDuration)
query, qerr := SecurityCheck(ctx, query)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
if err := query.RunWith(r.DB).QueryRow().Scan(&(stats[""].ShortJobs)); err != nil {
log.Warn("Error while scanning rows for short job stats")
return nil, err
}
} else {
col := groupBy2column[*groupBy]
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", config.Keys.ShortRunningJobsDuration)
query, qerr := SecurityCheck(ctx, query)
if qerr != nil {
return nil, qerr
}
for _, f := range filter {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.DB).Query()
if err != nil {
log.Warn("Error while querying jobs for short jobs")
return nil, err
}
for rows.Next() {
var id sql.NullString
var shortJobs sql.NullInt64
if err := rows.Scan(&id, &shortJobs); err != nil {
log.Warn("Error while scanning rows for short jobs")
return nil, err
}
if id.Valid {
stats[id.String].ShortJobs = int(shortJobs.Int64)
}
}
if col == "job.user" {
for id := range stats {
emptyDash := "-"
user := auth.GetUser(ctx)
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
if name != "" {
stats[id].Name = &name
} else {
stats[id].Name = &emptyDash
}
}
}
}
// Calculating the histogram data is expensive, so only do it if needed.
// An explicit resolver can not be used because we need to know the filters.
histogramsNeeded := false
fields := graphql.CollectFieldsCtx(ctx, nil)
for _, col := range fields {
if col.Name == "histDuration" || col.Name == "histNumNodes" {
histogramsNeeded = true
}
}
res := make([]*model.JobsStatistics, 0, len(stats))
for _, stat := range stats {
res = append(res, stat)
id, col := "", ""
if groupBy != nil {
id = stat.ID
col = groupBy2column[*groupBy]
}
if histogramsNeeded {
var err error
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter, id, col)
if err != nil {
log.Warn("Error while loading job statistics histogram: running jobs")
return nil, err
}
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
if err != nil {
log.Warn("Error while loading job statistics histogram: num nodes")
return nil, err
}
}
}
log.Infof("Timer JobStatistics %s", time.Since(start))
return res, nil
}
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
// to add a condition to the query of the kind "<col> = <id>".
func (r *JobRepository) jobsStatisticsHistogram(ctx context.Context,
value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
start := time.Now()
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
query, qerr := SecurityCheck(ctx, sq.Select(value, "COUNT(job.id) AS count").From("job"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
if len(id) != 0 && len(col) != 0 {
query = query.Where(col+" = ?", id)
}
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
points := make([]*model.HistoPoint, 0)
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
points = append(points, &point)
}
log.Infof("Timer jobsStatisticsHistogram %s", time.Since(start))
return points, nil
}

View File

@@ -8,10 +8,12 @@ import (
"fmt"
"testing"
"github.com/ClusterCockpit/cc-backend/pkg/log"
_ "github.com/mattn/go-sqlite3"
)
func init() {
log.Init("info", true)
Connect("sqlite3", "../../test/test.db")
}

View File

@@ -0,0 +1,113 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"database/sql"
"embed"
"fmt"
"os"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/golang-migrate/migrate/v4"
"github.com/golang-migrate/migrate/v4/database/mysql"
"github.com/golang-migrate/migrate/v4/database/sqlite3"
"github.com/golang-migrate/migrate/v4/source/iofs"
)
const supportedVersion uint = 3
//go:embed migrations/*
var migrationFiles embed.FS
func checkDBVersion(backend string, db *sql.DB) {
var m *migrate.Migrate
if backend == "sqlite3" {
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
if err != nil {
log.Fatal(err)
}
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver)
if err != nil {
log.Fatal(err)
}
} else if backend == "mysql" {
driver, err := mysql.WithInstance(db, &mysql.Config{})
if err != nil {
log.Fatal(err)
}
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithInstance("iofs", d, "mysql", driver)
if err != nil {
log.Fatal(err)
}
}
v, _, err := m.Version()
if err != nil {
if err == migrate.ErrNilVersion {
log.Warn("Legacy database without version or missing database file!")
} else {
log.Fatal(err)
}
}
if v < supportedVersion {
log.Warnf("Unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend --migrate-db", v, supportedVersion)
os.Exit(0)
}
if v > supportedVersion {
log.Warnf("Unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool!", v, supportedVersion)
os.Exit(0)
}
}
func MigrateDB(backend string, db string) {
var m *migrate.Migrate
if backend == "sqlite3" {
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
if err != nil {
log.Fatal(err)
}
} else if backend == "mysql" {
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
log.Fatal(err)
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
if err != nil {
log.Fatal(err)
}
}
if err := m.Up(); err != nil {
if err == migrate.ErrNoChange {
log.Info("DB already up to date!")
} else {
log.Fatal(err)
}
}
m.Close()
}

View File

@@ -0,0 +1,5 @@
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tags;
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS configuration;
DROP TABLE IF EXISTS user;

View File

@@ -0,0 +1,62 @@
CREATE TABLE IF NOT EXISTS job (
id INTEGER AUTO_INCREMENT PRIMARY KEY ,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
`partition` VARCHAR(255) NOT NULL,
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE IF NOT EXISTS tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE IF NOT EXISTS jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);

View File

@@ -0,0 +1,5 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_by_state;

View File

@@ -0,0 +1,5 @@
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);

View File

@@ -0,0 +1 @@
ALTER TABLE user DROP COLUMN projects;

View File

@@ -0,0 +1 @@
ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]";

View File

@@ -0,0 +1,5 @@
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tags;
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS configuration;
DROP TABLE IF EXISTS user;

View File

@@ -0,0 +1,62 @@
CREATE TABLE IF NOT EXISTS job (
id INTEGER PRIMARY KEY,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
partition VARCHAR(255) NOT NULL,
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE IF NOT EXISTS tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
CREATE TABLE IF NOT EXISTS jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);

View File

@@ -0,0 +1,5 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_by_state;

View File

@@ -0,0 +1,5 @@
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
CREATE INDEX IF NOT EXISTS job_by_state ON job (job_state);

View File

@@ -0,0 +1 @@
ALTER TABLE user DROP COLUMN projects;

View File

@@ -0,0 +1 @@
ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]";

View File

@@ -26,8 +26,11 @@ func (r *JobRepository) QueryJobs(
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job")
query = SecurityCheck(ctx, query)
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
}
if order != nil {
field := toSnakeCase(order.Field)
@@ -36,7 +39,7 @@ func (r *JobRepository) QueryJobs(
} else if order.Order == model.SortDirectionEnumDesc {
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
} else {
return nil, errors.New("invalid sorting order")
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
}
}
@@ -51,12 +54,14 @@ func (r *JobRepository) QueryJobs(
sql, args, err := query.ToSql()
if err != nil {
log.Warn("Error while converting query to sql")
return nil, err
}
log.Debugf("SQL query: `%s`, args: %#v", sql, args)
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -65,6 +70,7 @@ func (r *JobRepository) QueryJobs(
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
@@ -79,8 +85,12 @@ func (r *JobRepository) CountJobs(
filters []*model.JobFilter) (int, error) {
// count all jobs:
query := sq.Select("count(*)").From("job")
query = SecurityCheck(ctx, query)
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
if qerr != nil {
return 0, qerr
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
@@ -92,13 +102,23 @@ func (r *JobRepository) CountJobs(
return count, nil
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) sq.SelectBuilder {
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (queryOut sq.SelectBuilder, err error) {
user := auth.GetUser(ctx)
if user == nil || user.HasRole(auth.RoleAdmin) || user.HasRole(auth.RoleApi) || user.HasRole(auth.RoleSupport) {
return query
if user == nil || user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleApi}) { // Admin & Co. : All jobs
return query, nil
} else if user.HasRole(auth.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
} else {
log.Infof("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.user = ?", user.Username), nil
}
} else if user.HasRole(auth.RoleUser) { // User : Only personal jobs
return query.Where("job.user = ?", user.Username), nil
} else { // Unauthorized : Error
var qnil sq.SelectBuilder
return qnil, errors.New(fmt.Sprintf("User '%s' with unknown roles! [%#v]\n", user.Username, user.Roles))
}
return query.Where("job.user = ?", user.Username)
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
@@ -118,6 +138,9 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.JobName != nil {
query = buildStringCondition("job.meta_data", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
@@ -200,6 +223,13 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
if cond.Contains != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
if cond.In != nil {
queryUsers := make([]string, len(cond.In))
for i, val := range cond.In {
queryUsers[i] = val
}
return query.Where(sq.Or{sq.Eq{"job.user": queryUsers}})
}
return query
}
@@ -209,7 +239,7 @@ var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
func toSnakeCase(str string) string {
for _, c := range str {
if c == '\'' || c == '\\' {
panic("A hacker (probably not)!!!")
log.Panic("toSnakeCase() attack vector!")
}
}

View File

@@ -5,7 +5,11 @@
package repository
import (
"strings"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
@@ -13,16 +17,19 @@ import (
// Add the tag with id `tagId` to the job with the database id `jobId`.
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil {
log.Error("Error while running query")
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
tags, err := r.GetTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
@@ -32,16 +39,19 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
// Removes a tag from a job
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil {
log.Error("Error while running query")
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
tags, err := r.GetTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
@@ -58,7 +68,7 @@ func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64,
return res.LastInsertId()
}
func (r *JobRepository) CountTags(user *string) (tags []schema.Tag, counts map[string]int, err error) {
func (r *JobRepository) CountTags(user *auth.User) (tags []schema.Tag, counts map[string]int, err error) {
tags = make([]schema.Tag, 0, 100)
xrows, err := r.DB.Queryx("SELECT * FROM tag")
if err != nil {
@@ -77,9 +87,13 @@ func (r *JobRepository) CountTags(user *string) (tags []schema.Tag, counts map[s
From("tag t").
LeftJoin("jobtag jt ON t.id = jt.tag_id").
GroupBy("t.tag_name")
if user != nil {
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ?)", *user)
}
if user != nil && user.HasRole(auth.RoleUser) { // USER: Only count own jobs
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ?)", user.Username)
} else if user != nil && user.HasRole(auth.RoleManager) { // MANAGER: Count own jobs plus project's jobs
// Build ("project1", "project2", ...) list of variable length directly in SQL string
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
} // else: ADMIN || SUPPORT: Count all jobs
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
@@ -138,6 +152,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
@@ -145,6 +160,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
tags = append(tags, tag)

View File

@@ -6,12 +6,12 @@ package repository
import (
"encoding/json"
"log"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/jmoiron/sqlx"
)
@@ -33,21 +33,9 @@ func GetUserCfgRepo() *UserCfgRepo {
userCfgRepoOnce.Do(func() {
db := GetConnection()
_, err := db.DB.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
log.Fatal(err)
}
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil {
log.Fatal(err)
log.Fatalf("db.DB.Preparex() error: %v", err)
}
userCfgRepoInstance = &UserCfgRepo{
@@ -75,13 +63,14 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
}
data := uCfg.cache.Get(user.Username, func() (interface{}, time.Duration, int) {
config := make(map[string]interface{}, len(uCfg.uiDefaults))
uiconfig := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
config[k] = v
uiconfig[k] = v
}
rows, err := uCfg.Lookup.Query(user.Username)
if err != nil {
log.Warnf("Error while looking up user uiconfig for user '%v'", user.Username)
return err, 0, 0
}
@@ -90,22 +79,28 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
log.Warn("Error while scanning user uiconfig values")
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
log.Warn("Error while unmarshaling raw user uiconfig json")
return err, 0, 0
}
size += len(key)
size += len(rawval)
config[key] = val
uiconfig[key] = val
}
return config, 24 * time.Hour, size
// Add global ShortRunningJobsDuration setting as plot_list_hideShortRunningJobs
uiconfig["plot_list_hideShortRunningJobs"] = config.Keys.ShortRunningJobsDuration
return uiconfig, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
@@ -122,6 +117,7 @@ func (uCfg *UserCfgRepo) UpdateConfig(
if user == nil {
var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil {
log.Warn("Error while unmarshaling raw user config json")
return err
}
@@ -131,8 +127,8 @@ func (uCfg *UserCfgRepo) UpdateConfig(
return nil
}
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
user, key, value); err != nil {
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`, user.Username, key, value); err != nil {
log.Warnf("Error while replacing user config in DB for user '%v'", user.Username)
return err
}

View File

@@ -12,8 +12,9 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -44,7 +45,7 @@ var routes []Route = []Route{
{"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute},
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> - ClusterCockpit", false, setupClusterRoute},
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analaysis - ClusterCockpit", true, setupAnalysisRoute},
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterRoute},
}
@@ -61,21 +62,21 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
State: []schema.JobState{schema.JobStateRunning},
}}, nil, nil)
if err != nil {
log.Errorf("failed to count jobs: %s", err.Error())
log.Warnf("failed to count jobs: %s", err.Error())
runningJobs = map[string]int{}
}
totalJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, nil, nil, nil)
if err != nil {
log.Errorf("failed to count jobs: %s", err.Error())
log.Warnf("failed to count jobs: %s", err.Error())
totalJobs = map[string]int{}
}
from := time.Now().Add(-24 * time.Hour)
recentShortJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
StartTime: &schema.TimeRange{From: &from, To: nil},
Duration: &schema.IntRange{From: 0, To: graph.ShortJobDuration},
Duration: &schema.IntRange{From: 0, To: config.Keys.ShortRunningJobsDuration},
}}, nil, nil)
if err != nil {
log.Errorf("failed to count jobs: %s", err.Error())
log.Warnf("failed to count jobs: %s", err.Error())
recentShortJobs = map[string]int{}
}
@@ -103,6 +104,7 @@ func setupUserRoute(i InfoType, r *http.Request) InfoType {
username := mux.Vars(r)["id"]
i["id"] = username
i["username"] = username
// TODO: If forbidden (== err exists), redirect to error page
if user, _ := auth.FetchUser(r.Context(), jobRepo.DB, username); user != nil {
i["name"] = user.Name
i["email"] = user.Email
@@ -141,16 +143,13 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
}
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
var username *string = nil
jobRepo := repository.GetJobRepository()
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleAdmin) {
username = &user.Username
}
user := auth.GetUser(r.Context())
tags, counts, err := jobRepo.CountTags(username)
tags, counts, err := jobRepo.CountTags(user)
tagMap := make(map[string][]map[string]interface{})
if err != nil {
log.Errorf("GetTags failed: %s", err.Error())
log.Warnf("GetTags failed: %s", err.Error())
i["tagmap"] = tagMap
return i
}
@@ -180,9 +179,17 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
filterPresets["project"] = query.Get("project")
filterPresets["projectMatch"] = "eq"
}
if query.Get("user") != "" {
filterPresets["user"] = query.Get("user")
filterPresets["userMatch"] = "eq"
if query.Get("jobName") != "" {
filterPresets["jobName"] = query.Get("jobName")
}
if len(query["user"]) != 0 {
if len(query["user"]) == 1 {
filterPresets["user"] = query.Get("user")
filterPresets["userMatch"] = "contains"
} else {
filterPresets["user"] = query["user"]
filterPresets["userMatch"] = "in"
}
}
if len(query["state"]) != 0 {
filterPresets["state"] = query["state"]
@@ -270,17 +277,15 @@ func SetupRoutes(router *mux.Router, version string, hash string, buildTime stri
title = strings.Replace(route.Title, "<ID>", id.(string), 1)
}
username, isAdmin, isSupporter := "", true, true
if user := auth.GetUser(r.Context()); user != nil {
username = user.Username
isAdmin = user.HasRole(auth.RoleAdmin)
isSupporter = user.HasRole(auth.RoleSupport)
}
// Get User -> What if NIL?
user := auth.GetUser(r.Context())
// Get Roles
availableRoles, _ := auth.GetValidRolesMap(user)
page := web.Page{
Title: title,
User: web.User{Username: username, IsAdmin: isAdmin, IsSupporter: isSupporter},
User: *user,
Roles: availableRoles,
Build: web.Build{Version: version, Hash: hash, Buildtime: buildTime},
Config: conf,
Infos: infos,
@@ -294,3 +299,67 @@ func SetupRoutes(router *mux.Router, version string, hash string, buildTime stri
})
}
}
func HandleSearchBar(rw http.ResponseWriter, r *http.Request, api *api.RestApi) {
if search := r.URL.Query().Get("searchId"); search != "" {
user := auth.GetUser(r.Context())
splitSearch := strings.Split(search, ":")
if len(splitSearch) == 2 {
switch strings.Trim(splitSearch[0], " ") {
case "jobId":
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
case "jobName":
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
case "projectId":
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect) // All Users: Redirect to Tablequery
case "username":
if user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) {
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusTemporaryRedirect)
} else {
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect) // Users: Redirect to Tablequery
}
case "name":
usernames, _ := api.JobRepository.FindColumnValues(user, strings.Trim(splitSearch[1], " "), "user", "username", "name")
if len(usernames) != 0 {
joinedNames := strings.Join(usernames, "&user=")
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames, http.StatusTemporaryRedirect)
} else {
if user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) {
http.Redirect(rw, r, "/monitoring/users/?user=NoUserNameFound", http.StatusTemporaryRedirect)
} else {
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect) // Users: Redirect to Tablequery
}
}
default:
log.Warnf("Searchbar type parameter '%s' unknown", strings.Trim(splitSearch[0], " "))
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect) // Unknown: Redirect to Tablequery
}
} else if len(splitSearch) == 1 {
username, project, jobname, err := api.JobRepository.FindUserOrProjectOrJobname(r.Context(), strings.Trim(search, " "))
if err != nil {
log.Errorf("Error while searchbar best guess: %v", err.Error())
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect) // Unknown: Redirect to Tablequery
}
if username != "" {
http.Redirect(rw, r, "/monitoring/user/"+username, http.StatusTemporaryRedirect) // User: Redirect to user page
} else if project != "" {
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // projectId (equal)
} else if jobname != "" {
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // JobName (contains)
} else {
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(search, " ")), http.StatusTemporaryRedirect) // No Result: Probably jobId
}
} else {
log.Warnf("Searchbar query parameters malformed: %v", search)
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect) // Unknown: Redirect to Tablequery
}
} else {
http.Redirect(rw, r, "/monitoring/jobs/?", http.StatusTemporaryRedirect)
}
}

View File

@@ -14,6 +14,8 @@ import (
"strconv"
"strings"
"syscall"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
// Very simple and limited .env file reader.
@@ -22,6 +24,7 @@ import (
func LoadEnv(file string) error {
f, err := os.Open(file)
if err != nil {
log.Error("Error while opening file")
return err
}
@@ -40,14 +43,14 @@ func LoadEnv(file string) error {
line = strings.TrimPrefix(line, "export ")
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
return fmt.Errorf("unsupported line: %#v", line)
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
}
key := strings.TrimSpace(parts[0])
val := strings.TrimSpace(parts[1])
if strings.HasPrefix(val, "\"") {
if !strings.HasSuffix(val, "\"") {
return fmt.Errorf("unsupported line: %#v", line)
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
}
runes := []rune(val[1 : len(val)-1])
@@ -65,7 +68,7 @@ func LoadEnv(file string) error {
case '"':
sb.WriteRune('"')
default:
return fmt.Errorf("unsupprorted escape sequence in quoted string: backslash %#v", runes[i])
return fmt.Errorf("RUNTIME/SETUP > unsupported escape sequence in quoted string: backslash %#v", runes[i])
}
continue
}
@@ -89,11 +92,13 @@ func DropPrivileges(username string, group string) error {
if group != "" {
g, err := user.LookupGroup(group)
if err != nil {
log.Warn("Error while looking up group")
return err
}
gid, _ := strconv.Atoi(g.Gid)
if err := syscall.Setgid(gid); err != nil {
log.Warn("Error while setting gid")
return err
}
}
@@ -101,11 +106,13 @@ func DropPrivileges(username string, group string) error {
if username != "" {
u, err := user.Lookup(username)
if err != nil {
log.Warn("Error while looking up user")
return err
}
uid, _ := strconv.Atoi(u.Uid)
if err := syscall.Setuid(uid); err != nil {
log.Warn("Error while setting uid")
return err
}
}