Merge branch 'master' into 40_45_82_update_roles

This commit is contained in:
Jan Eitzinger 2023-04-07 08:19:04 +02:00 committed by GitHub
commit d420b8b666
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 78 additions and 117 deletions

View File

@ -24,7 +24,7 @@ SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
.PHONY: clean test $(TARGET)
.PHONY: clean test tags $(TARGET)
.NOTPARALLEL:
@ -43,6 +43,10 @@ test:
@go vet ./...
@go test ./...
tags:
$(info ===> TAGS)
@ctags -R
$(VAR):
@mkdir $(VAR)
cd web/frontend && yarn install

View File

@ -97,6 +97,12 @@
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
@ -726,13 +732,6 @@
},
"jobState": {
"description": "Final job state",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout"
],
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
@ -790,14 +789,6 @@
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
@ -926,14 +917,6 @@
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"allOf": [
{
"$ref": "#/definitions/schema.JobState"

View File

@ -76,12 +76,6 @@ definitions:
allOf:
- $ref: '#/definitions/schema.JobState'
description: Final job state
enum:
- completed
- failed
- cancelled
- stopped
- timeout
example: completed
startTime:
description: Start Time of job as epoch
@ -130,13 +124,6 @@ definitions:
allOf:
- $ref: '#/definitions/schema.JobState'
description: Final state of job
enum:
- completed
- failed
- cancelled
- stopped
- timeout
- out_of_memory
example: completed
metaData:
additionalProperties:
@ -239,13 +226,6 @@ definitions:
allOf:
- $ref: '#/definitions/schema.JobState'
description: Final state of job
enum:
- completed
- failed
- cancelled
- stopped
- timeout
- out_of_memory
example: completed
metaData:
additionalProperties:
@ -463,6 +443,10 @@ paths:
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"403":
description: Forbidden
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:

View File

@ -24,7 +24,8 @@ It is supported to specify these by means of an `.env` file located in the proje
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
* `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url.
* `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail!
* `"stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`;
* `"stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`.
* `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`.
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
- `url`: Type string. URL of LDAP directory server.
- `user_base`: Type string. Base DN of user tree root.
@ -54,7 +55,6 @@ It is supported to specify these by means of an `.env` file located in the proje
- `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`.
- `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`.
- `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`.
- `plot_list_hideShortRunningJobs`: Type int. Do not show running jobs shorter than X seconds. Default `300`.
- `plot_list_jobsPerPage`: Type int. Jobs shown per page in job lists. Default `50`.
- `plot_list_selectedMetrics`: Type string array. Initial metric plots shown in jobs lists. Default `"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"`.
- `plot_view_plotsPerRow`: Type int. Number of plots per row in single job view. Default `3`.

View File

@ -35,5 +35,6 @@
"forceJWTValidationViaDatabase": false,
"max-age": 0,
"trustedExternalIssuer": ""
}
},
"short-running-jobs-duration": 300
}

View File

@ -103,6 +103,12 @@ const docTemplate = `{
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
@ -732,13 +738,6 @@ const docTemplate = `{
},
"jobState": {
"description": "Final job state",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout"
],
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
@ -796,14 +795,6 @@ const docTemplate = `{
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
@ -932,14 +923,6 @@ const docTemplate = `{
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"allOf": [
{
"$ref": "#/definitions/schema.JobState"

View File

@ -105,10 +105,10 @@ type DeleteJobApiResponse struct {
type StopJobApiRequest struct {
// Stop Time of job as epoch
StopTime int64 `json:"stopTime" validate:"required" example:"1649763839"`
State schema.JobState `json:"jobState" validate:"required" example:"completed" enums:"completed,failed,cancelled,stopped,timeout"` // Final job state
JobId *int64 `json:"jobId" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
State schema.JobState `json:"jobState" validate:"required" example:"completed"` // Final job state
JobId *int64 `json:"jobId" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
}
// DeleteJobApiRequest model

View File

@ -25,6 +25,7 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
LdapConfig: nil,
SessionMaxAge: "168h",
StopJobsExceedingWalltime: 0,
ShortRunningJobsDuration: 5 * 60,
UiDefaults: map[string]interface{}{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
@ -34,7 +35,6 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
"plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3,
"plot_list_hideShortRunningJobs": 5 * 60,
"plot_list_jobsPerPage": 50,
"plot_list_selectedMetrics": []string{"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"},
"plot_view_plotsPerRow": 3,

View File

@ -16,6 +16,7 @@ import (
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@ -671,9 +672,6 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
return nil
}
// TODO: Move to config
const ShortJobDuration int = 5 * 60
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
@ -767,7 +765,8 @@ func (r *JobRepository) JobsStatistics(ctx context.Context,
}
if groupBy == nil {
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < ?", config.Keys.ShortRunningJobsDuration)
query, qerr := SecurityCheck(ctx, query)
if qerr != nil {
@ -783,7 +782,8 @@ func (r *JobRepository) JobsStatistics(ctx context.Context,
}
} else {
col := groupBy2column[*groupBy]
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", ShortJobDuration)
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < ?", config.Keys.ShortRunningJobsDuration)
query, qerr := SecurityCheck(ctx, query)

View File

@ -11,8 +11,8 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/jmoiron/sqlx"
)
@ -63,14 +63,14 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
}
data := uCfg.cache.Get(user.Username, func() (interface{}, time.Duration, int) {
config := make(map[string]interface{}, len(uCfg.uiDefaults))
uiconfig := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
config[k] = v
uiconfig[k] = v
}
rows, err := uCfg.Lookup.Query(user.Username)
if err != nil {
log.Warnf("Error while looking up user config for user '%v'", user.Username)
log.Warnf("Error while looking up user uiconfig for user '%v'", user.Username)
return err, 0, 0
}
@ -79,22 +79,25 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
log.Warn("Error while scanning user config values")
log.Warn("Error while scanning user uiconfig values")
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
log.Warn("Error while unmarshaling raw user config json")
log.Warn("Error while unmarshaling raw user uiconfig json")
return err, 0, 0
}
size += len(key)
size += len(rawval)
config[key] = val
uiconfig[key] = val
}
return config, 24 * time.Hour, size
// Add global ShortRunningJobsDuration setting as plot_list_hideShortRunningJobs
uiconfig["plot_list_hideShortRunningJobs"] = config.Keys.ShortRunningJobsDuration
return uiconfig, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
@ -124,8 +127,8 @@ func (uCfg *UserCfgRepo) UpdateConfig(
return nil
}
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`, user, key, value); err != nil {
log.Warnf("Error while replacing user config in DB for user '%v'", user)
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`, user.Username, key, value); err != nil {
log.Warnf("Error while replacing user config in DB for user '%v'", user.Username)
return err
}

View File

@ -14,6 +14,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@ -72,7 +73,7 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
from := time.Now().Add(-24 * time.Hour)
recentShortJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
StartTime: &schema.TimeRange{From: &from, To: nil},
Duration: &schema.IntRange{From: 0, To: repository.ShortJobDuration},
Duration: &schema.IntRange{From: 0, To: config.Keys.ShortRunningJobsDuration},
}}, nil, nil)
if err != nil {
log.Warnf("failed to count jobs: %s", err.Error())

View File

@ -116,6 +116,9 @@ type ProgramConfig struct {
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views.
ShortRunningJobsDuration int `json:"short-running-jobs-duration"`
// Array of Clusters
Clusters []*ClusterConfig `json:"clusters"`
}

View File

@ -17,26 +17,26 @@ import (
type BaseJob struct {
// The unique identifier of a job
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
Partition string `json:"partition" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
ArrayJobId int64 `json:"arrayJobId" db:"array_job_id" example:"123000"` // The unique identifier of an array job
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
NumAcc int32 `json:"numAcc" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
SMT int32 `json:"smt" db:"smt" example:"4"` // SMT threads used by job
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
Walltime int64 `json:"walltime" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
Tags []*Tag `json:"tags"` // List of tags
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
Resources []*Resource `json:"resources"` // Resources used by job
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
MetaData map[string]string `json:"metaData"` // Additional information about the job
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
Partition string `json:"partition" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
ArrayJobId int64 `json:"arrayJobId" db:"array_job_id" example:"123000"` // The unique identifier of an array job
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
NumAcc int32 `json:"numAcc" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
SMT int32 `json:"smt" db:"smt" example:"4"` // SMT threads used by job
State JobState `json:"jobState" db:"job_state" example:"completed"` // Final state of job
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
Walltime int64 `json:"walltime" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
Tags []*Tag `json:"tags"` // List of tags
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
Resources []*Resource `json:"resources"` // Resources used by job
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
MetaData map[string]string `json:"metaData"` // Additional information about the job
}
// Non-Swaggered Comment: Job

View File

@ -76,6 +76,10 @@
"description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.",
"type": "integer"
},
"short-running-jobs-duration": {
"description": "Do not show running jobs shorter than X seconds.",
"type": "integer"
},
"": {
"description": "",
"type": "string"
@ -241,10 +245,6 @@
"description": "Jobs shown per page in job lists",
"type": "integer"
},
"plot_list_hideShortRunningJobs": {
"description": "Do not show running jobs shorter than X seconds",
"type": "integer"
},
"plot_view_plotsPerRow": {
"description": "Number of plots per row in single job view",
"type": "integer"
@ -342,8 +342,7 @@
"job_view_polarPlotMetrics",
"job_view_selectedMetrics",
"plot_general_colorscheme",
"plot_list_selectedMetrics",
"plot_list_hideShortRunningJobs"
"plot_list_selectedMetrics"
]
}
},