Merge branch 'hotfix' into add_detailed_nodelist

This commit is contained in:
Christoph Kluge
2025-01-07 14:07:41 +01:00
93 changed files with 3383 additions and 3922 deletions

View File

@@ -14,9 +14,9 @@ import (
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
@@ -120,7 +120,7 @@ func setup(t *testing.T) *api.RestApi {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
t.Fatal(err)
}
@@ -200,6 +200,10 @@ func TestRestApi(t *testing.T) {
r.StrictSlash(true)
restapi.MountApiRoutes(r)
var TestJobId int64 = 123
var TestClusterName string = "testcluster"
var TestStartTime int64 = 123456789
const startJobBody string = `{
"jobId": 123,
"user": "testuser",
@@ -225,7 +229,6 @@ func TestRestApi(t *testing.T) {
"startTime": 123456789
}`
var dbid int64
const contextUserKey repository.ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
@@ -246,14 +249,8 @@ func TestRestApi(t *testing.T) {
if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String())
}
var res api.StartJobApiResponse
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
t.Fatal(err)
}
resolver := graph.GetResolverInstance()
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(res.DBID)))
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
if err != nil {
t.Fatal(err)
}
@@ -285,8 +282,6 @@ func TestRestApi(t *testing.T) {
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
t.Fatalf("unexpected tags: %#v", job.Tags)
}
dbid = res.DBID
}); !ok {
return
}
@@ -314,8 +309,7 @@ func TestRestApi(t *testing.T) {
}
archiver.WaitForArchiving()
resolver := graph.GetResolverInstance()
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(dbid)))
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
if err != nil {
t.Fatal(err)
}
@@ -404,8 +398,10 @@ func TestRestApi(t *testing.T) {
t.Fatal("subtest failed")
}
time.Sleep(1 * time.Second)
const stopJobBodyFailed string = `{
"jobId": 12345,
"jobId": 12345,
"cluster": "testcluster",
"jobState": "failed",

View File

@@ -601,88 +601,6 @@ const docTemplate = `{
}
}
},
"/jobs/stop_job/{id}": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to stop is specified by database ID. Only stopTime and final state are required in request body.\nReturns full job resource information according to 'JobMeta' scheme.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"Job add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
"type": "integer",
"description": "Database ID of Job",
"name": "id",
"in": "path",
"required": true
},
{
"description": "stopTime and final state in request body",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.StopJobApiRequest"
}
}
],
"responses": {
"200": {
"description": "Job resource",
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/tag_job/{id}": {
"post": {
"security": [
@@ -690,7 +608,7 @@ const docTemplate = `{
"ApiKeyAuth": []
}
],
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nTag Scope for frontend visibility will default to \"global\" if none entered, other options: \"admin\" or specific username.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
"consumes": [
"application/json"
],
@@ -1283,6 +1201,11 @@ const docTemplate = `{
"type": "string",
"example": "Testjob"
},
"scope": {
"description": "Tag Scope for Frontend Display",
"type": "string",
"example": "global"
},
"type": {
"description": "Tag Type",
"type": "string",
@@ -1410,9 +1333,8 @@ const docTemplate = `{
"api.StartJobApiResponse": {
"type": "object",
"properties": {
"id": {
"description": "Database ID of new job",
"type": "integer"
"msg": {
"type": "string"
}
}
},
@@ -1424,17 +1346,14 @@ const docTemplate = `{
],
"properties": {
"cluster": {
"description": "Cluster of job",
"type": "string",
"example": "fritz"
},
"jobId": {
"description": "Cluster Job ID of job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final job state",
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
@@ -1443,12 +1362,10 @@ const docTemplate = `{
"example": "completed"
},
"startTime": {
"description": "Start Time of job as epoch",
"type": "integer",
"example": 1649723812
},
"stopTime": {
"description": "Stop Time of job as epoch",
"type": "integer",
"example": 1649763839
}
@@ -1493,12 +1410,10 @@ const docTemplate = `{
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
@@ -1506,33 +1421,39 @@ const docTemplate = `{
"$ref": "#/definitions/schema.JobLinkResultList"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"energy": {
"type": "number"
},
"energyFootprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"flopsAnyAvg": {
"description": "FlopsAnyAvg as Float64",
"type": "number"
"footprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
@@ -1548,95 +1469,69 @@ const docTemplate = `{
],
"example": "completed"
},
"loadAvg": {
"description": "LoadAvg as Float64",
"type": "number"
},
"memBwAvg": {
"description": "MemBwAvg as Float64",
"type": "number"
},
"memUsedMax": {
"description": "MemUsedMax as Float64",
"type": "number"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start time as 'time.Time' data type",
"type": "string"
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
@@ -1673,12 +1568,10 @@ const docTemplate = `{
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
@@ -1686,29 +1579,39 @@ const docTemplate = `{
"$ref": "#/definitions/schema.JobLinkResultList"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"energy": {
"type": "number"
},
"energyFootprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"footprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
@@ -1725,91 +1628,76 @@ const docTemplate = `{
"example": "completed"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start epoch time stamp in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 1649723812
},
"statistics": {
"description": "Metric statistics of job",
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/schema.JobStatistics"
}
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
@@ -1898,6 +1786,15 @@ const docTemplate = `{
"caution": {
"type": "number"
},
"energy": {
"type": "string"
},
"footprint": {
"type": "string"
},
"lowerIsBetter": {
"type": "boolean"
},
"name": {
"type": "string"
},
@@ -1975,22 +1872,18 @@ const docTemplate = `{
"type": "object",
"properties": {
"accelerators": {
"description": "List of of accelerator device ids",
"type": "array",
"items": {
"type": "string"
}
},
"configuration": {
"description": "The configuration options of the node",
"type": "string"
},
"hostname": {
"description": "Name of the host (= node)",
"type": "string"
},
"hwthreads": {
"description": "List of OS processor ids",
"type": "array",
"items": {
"type": "integer"
@@ -2033,6 +1926,12 @@ const docTemplate = `{
"type": "number"
}
},
"median": {
"type": "array",
"items": {
"type": "number"
}
},
"min": {
"type": "array",
"items": {
@@ -2056,15 +1955,33 @@ const docTemplate = `{
"coresPerSocket": {
"type": "integer"
},
"energyFootprint": {
"type": "array",
"items": {
"type": "string"
}
},
"flopRateScalar": {
"$ref": "#/definitions/schema.MetricValue"
},
"flopRateSimd": {
"$ref": "#/definitions/schema.MetricValue"
},
"footprint": {
"type": "array",
"items": {
"type": "string"
}
},
"memoryBandwidth": {
"$ref": "#/definitions/schema.MetricValue"
},
"metricConfig": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.MetricConfig"
}
},
"name": {
"type": "string"
},
@@ -2094,6 +2011,15 @@ const docTemplate = `{
"caution": {
"type": "number"
},
"energy": {
"type": "string"
},
"footprint": {
"type": "string"
},
"lowerIsBetter": {
"type": "boolean"
},
"name": {
"type": "string"
},
@@ -2113,16 +2039,17 @@ const docTemplate = `{
"type": "object",
"properties": {
"id": {
"description": "The unique DB identifier of a tag",
"type": "integer"
},
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"scope": {
"type": "string",
"example": "global"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}

View File

@@ -72,7 +72,6 @@ func (api *RestApi) MountApiRoutes(r *mux.Router) {
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/{id}", api.stopJobById).Methods(http.MethodPost, http.MethodPut)
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
@@ -111,6 +110,7 @@ func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
r.HandleFunc("/users/", api.getUsers).Methods(http.MethodGet)
r.HandleFunc("/users/", api.deleteUser).Methods(http.MethodDelete)
r.HandleFunc("/user/{id}", api.updateUser).Methods(http.MethodPost)
r.HandleFunc("/notice/", api.editNotice).Methods(http.MethodPost)
}
}
@@ -123,19 +123,8 @@ func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) {
}
}
// StartJobApiResponse model
type StartJobApiResponse struct {
// Database ID of new job
DBID int64 `json:"id"`
}
// DeleteJobApiResponse model
type DeleteJobApiResponse struct {
Message string `json:"msg"`
}
// UpdateUserApiResponse model
type UpdateUserApiResponse struct {
// DefaultApiResponse model
type DefaultJobApiResponse struct {
Message string `json:"msg"`
}
@@ -342,7 +331,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
withMetadata := false
filter := &model.JobFilter{}
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
for key, vals := range r.URL.Query() {
switch key {
@@ -421,7 +410,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
StartTime: job.StartTime.Unix(),
}
res.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
res.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
@@ -486,15 +475,15 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
job, err = api.JobRepository.FindById(r.Context(), id) // Get Job from Repo by ID
} else {
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
handleError(fmt.Errorf("the parameter 'id' is required"), http.StatusBadRequest, rw)
return
}
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
return
}
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
@@ -526,7 +515,7 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
if r.URL.Query().Get("all-metrics") == "true" {
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
if err != nil {
log.Warn("Error while loading job data")
log.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
return
}
}
@@ -583,11 +572,11 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
return
}
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
return
}
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
@@ -622,7 +611,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
if err != nil {
log.Warn("Error while loading job data")
log.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
return
}
@@ -728,7 +717,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
return
}
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
@@ -741,7 +730,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
}
for _, tag := range req {
tagId, err := api.JobRepository.AddTagOrCreate(r.Context(), job.ID, tag.Type, tag.Name, tag.Scope)
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), job.ID, tag.Type, tag.Name, tag.Scope)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
@@ -819,7 +808,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
unlockOnce.Do(api.RepositoryMutex.Unlock)
for _, tag := range req.Tags {
if _, err := api.JobRepository.AddTagOrCreate(r.Context(), id, tag.Type, tag.Name, tag.Scope); err != nil {
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
return
@@ -829,61 +818,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusCreated)
json.NewEncoder(rw).Encode(StartJobApiResponse{
DBID: id,
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: "success",
})
}
// stopJobById godoc
// @summary Marks job as completed and triggers archiving
// @tags Job add and modify
// @description Job to stop is specified by database ID. Only stopTime and final state are required in request body.
// @description Returns full job resource information according to 'JobMeta' scheme.
// @accept json
// @produce json
// @param id path int true "Database ID of Job"
// @param request body api.StopJobApiRequest true "stopTime and final state in request body"
// @success 200 {object} schema.JobMeta "Job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 404 {object} api.ErrorResponse "Resource not found"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /jobs/stop_job/{id} [post]
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
// Parse request body: Only StopTime and State
req := StopJobApiRequest{}
if err := decode(r.Body, &req); err != nil {
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
return
}
// Fetch job (that will be stopped) from db
id, ok := mux.Vars(r)["id"]
var job *schema.Job
var err error
if ok {
id, e := strconv.ParseInt(id, 10, 64)
if e != nil {
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
return
}
job, err = api.JobRepository.FindById(r.Context(), id)
} else {
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
return
}
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
api.checkAndHandleStopJob(rw, job, req)
}
// stopJobByRequest godoc
// @summary Marks job as completed and triggers archiving
// @tags Job add and modify
@@ -916,6 +855,7 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
return
}
// log.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
@@ -962,7 +902,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
}
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: fmt.Sprintf("Successfully deleted job %s", id),
})
}
@@ -1013,7 +953,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
})
}
@@ -1057,7 +997,7 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
})
}
@@ -1065,12 +1005,12 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
// Sanity checks
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
handleError(errors.New("stopTime must be larger than startTime and only running jobs can be stopped"), http.StatusBadRequest, rw)
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d and only running jobs can be stopped (state is: %s)", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix(), job.State), http.StatusBadRequest, rw)
return
}
if req.State != "" && !req.State.Valid() {
handleError(fmt.Errorf("invalid job state: %#v", req.State), http.StatusBadRequest, rw)
handleError(fmt.Errorf("jobId %d (id %d) on %s : invalid requested job state: %#v", job.JobID, job.ID, job.Cluster, req.State), http.StatusBadRequest, rw)
return
} else if req.State == "" {
req.State = schema.JobStateCompleted
@@ -1080,11 +1020,11 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
job.Duration = int32(req.StopTime - job.StartTime.Unix())
job.State = req.State
if err := api.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
handleError(fmt.Errorf("marking job as stopped failed: %w", err), http.StatusInternalServerError, rw)
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
return
}
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime)
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
// Send a response (with status OK). This means that erros that happen from here on forward
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
@@ -1356,6 +1296,69 @@ func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
}
}
// editNotice godoc
// @summary Updates or empties the notice box content
// @tags User
// @description Modifies the content of notice.txt, shown as notice box on the homepage.
// @description If more than one formValue is set then only the highest priority field is used.
// @description Only accessible from IPs registered with apiAllowedIPs configuration option.
// @accept mpfd
// @produce plain
// @param new-content formData string false "Priority 1: New content to display"
// @success 200 {string} string "Success Response Message"
// @failure 400 {string} string "Bad Request"
// @failure 401 {string} string "Unauthorized"
// @failure 403 {string} string "Forbidden"
// @failure 422 {string} string "Unprocessable Entity: The user could not be updated"
// @failure 500 {string} string "Internal Server Error"
// @security ApiKeyAuth
// @router /notice/ [post]
func (api *RestApi) editNotice(rw http.ResponseWriter, r *http.Request) {
err := securedCheck(r)
if err != nil {
http.Error(rw, err.Error(), http.StatusForbidden)
return
}
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
http.Error(rw, "Only admins are allowed to update the notice.txt file", http.StatusForbidden)
return
}
// Get Value
newContent := r.FormValue("new-content")
// Check FIle
noticeExists := util.CheckFileExists("./var/notice.txt")
if !noticeExists {
ntxt, err := os.Create("./var/notice.txt")
if err != nil {
log.Errorf("Creating ./var/notice.txt failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
}
ntxt.Close()
}
if newContent != "" {
if err := os.WriteFile("./var/notice.txt", []byte(newContent), 0o666); err != nil {
log.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
} else {
rw.Write([]byte("Update Notice Content Success"))
}
} else {
if err := os.WriteFile("./var/notice.txt", []byte(""), 0o666); err != nil {
log.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
} else {
rw.Write([]byte("Empty Notice Content Success"))
}
}
}
func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
err := securedCheck(r)
if err != nil {

View File

@@ -188,6 +188,10 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
if auth.SessionMaxAge != 0 {
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
}
if config.Keys.HttpsCertFile == "" && config.Keys.HttpsKeyFile == "" {
session.Options.Secure = false
}
session.Options.SameSite = http.SameSiteStrictMode
session.Values["username"] = user.Username
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
@@ -201,7 +205,6 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
}
func (auth *Authentication) Login(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
@@ -238,7 +241,13 @@ func (auth *Authentication) Login(
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
if r.FormValue("redirect") != "" {
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
return
}
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
return
}

View File

@@ -29,10 +29,9 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_showFootprint": true,
"job_list_usePaging": true,
"job_list_usePaging": false,
"plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3,

View File

@@ -31,15 +31,12 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
// Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(ctx, &obj.ID)
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
}
// ConcurrentJobs is the resolver for the concurrentJobs field.
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
if obj.State == schema.JobStateRunning {
obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
}
// FIXME: Make the hardcoded duration configurable
if obj.Exclusive != 1 && obj.Duration > 600 {
return r.Repo.FindConcurrentJobs(ctx, obj)
}
@@ -159,7 +156,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
return nil, err
}
if tags, err = r.Repo.AddTag(ctx, jid, tid); err != nil {
if tags, err = r.Repo.AddTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
log.Warn("Error while adding tag")
return nil, err
}
@@ -185,7 +182,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
return nil, err
}
if tags, err = r.Repo.RemoveTag(ctx, jid, tid); err != nil {
if tags, err = r.Repo.RemoveTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
log.Warn("Error while removing tag")
return nil, err
}
@@ -211,7 +208,7 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
// Tags is the resolver for the tags field.
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
return r.Repo.GetTags(ctx, nil)
return r.Repo.GetTags(repository.GetUserFromContext(ctx), nil)
}
// GlobalMetrics is the resolver for the globalMetrics field.
@@ -496,9 +493,11 @@ func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
// SubCluster returns generated.SubClusterResolver implementation.
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
type clusterResolver struct{ *Resolver }
type jobResolver struct{ *Resolver }
type metricValueResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver }
type (
clusterResolver struct{ *Resolver }
jobResolver struct{ *Resolver }
metricValueResolver struct{ *Resolver }
mutationResolver struct{ *Resolver }
queryResolver struct{ *Resolver }
subClusterResolver struct{ *Resolver }
)

View File

@@ -8,6 +8,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"math"
"os"
"strings"
@@ -84,7 +85,8 @@ func HandleImportFlag(flag string) error {
}
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[fp] = repository.LoadJobStat(&job, name, statType)
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
@@ -92,6 +94,34 @@ func HandleImportFlag(flag string) error {
log.Warn("Error while marshaling job footprint")
return err
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
for _, fp := range sc.EnergyFootprint {
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
return err
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Warn("Error while marshaling job resources")

View File

@@ -82,7 +82,7 @@ func setup(t *testing.T) *repository.JobRepository {
if err := os.Mkdir(jobarchive, 0777); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
t.Fatal(err)
}
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")

View File

@@ -7,6 +7,7 @@ package importer
import (
"encoding/json"
"fmt"
"math"
"strings"
"time"
@@ -70,6 +71,7 @@ func InitDB() error {
log.Errorf("cannot get subcluster: %s", err.Error())
return err
}
job.Footprint = make(map[string]float64)
for _, fp := range sc.Footprint {
@@ -81,7 +83,7 @@ func InitDB() error {
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[fp] = repository.LoadJobStat(jobMeta, name, statType)
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
@@ -90,6 +92,33 @@ func InitDB() error {
return err
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
for _, fp := range sc.EnergyFootprint {
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return err
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Errorf("repository initDB(): %v", err)

View File

@@ -51,7 +51,7 @@ func GetJobRepository() *JobRepository {
}
var jobColumns []string = []string{
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
}
@@ -79,12 +79,9 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
}
job.RawFootprint = nil
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err
// }
job.StartTime = time.Unix(job.StartTimeUnix, 0)
if job.Duration == 0 && job.State == schema.JobStateRunning {
// Always ensure accurate duration for running jobs
if job.State == schema.JobStateRunning {
job.Duration = int32(time.Since(job.StartTime).Seconds())
}
@@ -308,17 +305,17 @@ func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm
return searchterm, "", "", ""
} else { // Has to have letters and logged-in user for other guesses
if user != nil {
// Find username in jobs (match)
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false)
// Find username by username in job table (match)
uresult, _ := r.FindColumnValue(user, searchterm, "job", "hpc_user", "hpc_user", false)
if uresult != "" {
return "", uresult, "", ""
}
// Find username by name (like)
nresult, _ := r.FindColumnValue(user, searchterm, "user", "username", "name", true)
// Find username by real name in hpc_user table (like)
nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true)
if nresult != "" {
return "", nresult, "", ""
}
// Find projectId in jobs (match)
// Find projectId by projectId in job table (match)
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
if presult != "" {
return "", "", presult, ""
@@ -400,7 +397,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
return nil, 0, 1000
}
@@ -457,6 +454,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
return subclusters, nil
}
// FIXME: Set duration to requested walltime?
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now()
res, err := sq.Update("job").
@@ -604,13 +602,12 @@ func (r *JobRepository) UpdateEnergy(
for _, fp := range sc.EnergyFootprint {
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" {
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
// Power: Use directly as sum (Or as: [Energy (in Ws) / Time (in s)]
} else if sc.MetricConfig[i].Energy == "power" {
// This assumes the metric is of aggregation type sum
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)

View File

@@ -14,11 +14,11 @@ import (
)
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, resources, meta_data
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :resources, :meta_data
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);`
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {

View File

@@ -37,8 +37,9 @@ func (r *JobRepository) Find(
q = q.Where("job.start_time = ?", *startTime)
}
log.Debugf("Timer Find %s", time.Since(start))
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
log.Debugf("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
@@ -98,6 +99,23 @@ func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job,
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdWithUser executes a SQL query to find a specific batch job.
// The job is queried using the database id. The user is passed directly,
// instead as part of the context.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
q, qerr := SecurityCheckWithUser(user, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdDirect executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
@@ -135,7 +153,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl
q := sq.Select("id").
From("job").
Where("job.job_id = ?", jobId).
Where("job.user = ?", user).
Where("job.hpc_user = ?", user).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)

View File

@@ -107,8 +107,7 @@ func (r *JobRepository) CountJobs(
return count, nil
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx)
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
if user == nil {
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user context is nil")
@@ -121,19 +120,25 @@ func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilde
return query, nil
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
} else {
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.user = ?", user.Username), nil
return query.Where("job.hpc_user = ?", user.Username), nil
}
case user.HasRole(schema.RoleUser): // User : Only personal jobs
return query.Where("job.user = ?", user.Username), nil
return query.Where("job.hpc_user = ?", user.Username), nil
default: // No known Role, return error
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user has no or unknown roles")
}
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx)
return SecurityCheckWithUser(user, query)
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
if filter.Tags != nil {
@@ -147,7 +152,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
}
if filter.User != nil {
query = buildStringCondition("job.user", filter.User, query)
query = buildStringCondition("job.hpc_user", filter.User, query)
}
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
@@ -159,14 +164,13 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.partition", filter.Partition, query)
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
}
if filter.Duration != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
query = buildIntCondition("job.duration", filter.Duration, query)
}
if filter.MinRunningFor != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.

View File

@@ -59,7 +59,7 @@ func TestGetTags(t *testing.T) {
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
// Test Tag has Scope "global"
tags, counts, err := r.CountTags(ctx)
tags, counts, err := r.CountTags(GetUserFromContext(ctx))
if err != nil {
t.Fatal(err)
}

View File

@@ -114,6 +114,14 @@ func MigrateDB(backend string, db string) error {
return err
}
v, dirty, err := m.Version()
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
if dirty {
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
}
if err := m.Up(); err != nil {
if err == migrate.ErrNoChange {
log.Info("DB already up to date!")

View File

@@ -0,0 +1,83 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;
-- Do not use reserved keywords anymore
RENAME TABLE hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_user;
DROP INDEX IF EXISTS jobs_user_starttime;
DROP INDEX IF EXISTS jobs_user_duration;
DROP INDEX IF EXISTS jobs_user_numnodes;
DROP INDEX IF EXISTS jobs_project;
DROP INDEX IF EXISTS jobs_project_user;
DROP INDEX IF EXISTS jobs_project_starttime;
DROP INDEX IF EXISTS jobs_project_duration;
DROP INDEX IF EXISTS jobs_project_numnodes;
DROP INDEX IF EXISTS jobs_jobstate;
DROP INDEX IF EXISTS jobs_jobstate_user;
DROP INDEX IF EXISTS jobs_jobstate_project;
DROP INDEX IF EXISTS jobs_jobstate_starttime;
DROP INDEX IF EXISTS jobs_jobstate_duration;
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_starttime;
DROP INDEX IF EXISTS jobs_duration;
DROP INDEX IF EXISTS jobs_numnodes;
DROP INDEX IF EXISTS jobs_duration_starttime;
DROP INDEX IF EXISTS jobs_numnodes_starttime;
DROP INDEX IF EXISTS jobs_numacc_starttime;
DROP INDEX IF EXISTS jobs_energy_starttime;

View File

@@ -0,0 +1,123 @@
DROP INDEX IF EXISTS job_stats ON job;
DROP INDEX IF EXISTS job_by_user ON job;
DROP INDEX IF EXISTS job_by_starttime ON job;
DROP INDEX IF EXISTS job_by_job_id ON job;
DROP INDEX IF EXISTS job_list ON job;
DROP INDEX IF EXISTS job_list_user ON job;
DROP INDEX IF EXISTS job_list_users ON job;
DROP INDEX IF EXISTS job_list_users_start ON job;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint JSON;
ALTER TABLE job ADD COLUMN footprint JSON;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-- Do not use reserved keywords anymore
RENAME TABLE `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Optimize DB index usage

View File

@@ -19,3 +19,85 @@ UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_numacc;
DROP INDEX IF EXISTS jobs_cluster_energy;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
DROP INDEX IF EXISTS jobs_user;
DROP INDEX IF EXISTS jobs_user_starttime;
DROP INDEX IF EXISTS jobs_user_duration;
DROP INDEX IF EXISTS jobs_user_numnodes;
DROP INDEX IF EXISTS jobs_user_numhwthreads;
DROP INDEX IF EXISTS jobs_user_numacc;
DROP INDEX IF EXISTS jobs_user_energy;
DROP INDEX IF EXISTS jobs_project;
DROP INDEX IF EXISTS jobs_project_user;
DROP INDEX IF EXISTS jobs_project_starttime;
DROP INDEX IF EXISTS jobs_project_duration;
DROP INDEX IF EXISTS jobs_project_numnodes;
DROP INDEX IF EXISTS jobs_project_numhwthreads;
DROP INDEX IF EXISTS jobs_project_numacc;
DROP INDEX IF EXISTS jobs_project_energy;
DROP INDEX IF EXISTS jobs_jobstate;
DROP INDEX IF EXISTS jobs_jobstate_user;
DROP INDEX IF EXISTS jobs_jobstate_project;
DROP INDEX IF EXISTS jobs_jobstate_starttime;
DROP INDEX IF EXISTS jobs_jobstate_duration;
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_jobstate_numacc;
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_starttime;
DROP INDEX IF EXISTS jobs_duration;
DROP INDEX IF EXISTS jobs_numnodes;
DROP INDEX IF EXISTS jobs_numhwthreads;
DROP INDEX IF EXISTS jobs_numacc;
DROP INDEX IF EXISTS jobs_energy;
DROP INDEX IF EXISTS jobs_duration_starttime;
DROP INDEX IF EXISTS jobs_numnodes_starttime;
DROP INDEX IF EXISTS jobs_numhwthreads_starttime;
DROP INDEX IF EXISTS jobs_numacc_starttime;
DROP INDEX IF EXISTS jobs_energy_starttime;

View File

@@ -1,11 +1,11 @@
DROP INDEX job_stats;
DROP INDEX job_by_user;
DROP INDEX job_by_starttime;
DROP INDEX job_by_job_id;
DROP INDEX job_list;
DROP INDEX job_list_user;
DROP INDEX job_list_users;
DROP INDEX job_list_users_start;
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_list;
DROP INDEX IF EXISTS job_list_user;
DROP INDEX IF EXISTS job_list_users;
DROP INDEX IF EXISTS job_list_users_start;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
@@ -13,6 +13,11 @@ ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-- Do not use reserved keywords anymore
ALTER TABLE "user" RENAME TO hpc_user;
ALTER TABLE job RENAME COLUMN "user" TO hpc_user;
ALTER TABLE job RENAME COLUMN "partition" TO cluster_partition;
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
@@ -35,7 +40,7 @@ ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, user);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
@@ -47,30 +52,30 @@ CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, partition);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, partition, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, partition, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, partition, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, partition, energy);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, partition, job_state, user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, partition, job_state, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, partition, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, partition, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, partition, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, partition, job_state, energy);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
@@ -81,18 +86,18 @@ CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_sta
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (user);
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (user, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (user, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (user, num_acc);
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (user, energy);
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, user);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
@@ -103,7 +108,7 @@ CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting

View File

@@ -111,7 +111,7 @@ func BenchmarkDB_QueryJobs(b *testing.B) {
user := "mppi133h"
filter.User = &model.StringInput{Eq: &user}
page := &model.PageRequest{ItemsPerPage: 50, Page: 1}
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
b.Run("QueryJobs", func(b *testing.B) {
db := setup(b)

View File

@@ -22,7 +22,7 @@ import (
// GraphQL validation should make sure that no unkown values can be specified.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.user",
model.AggregateUser: "job.hpc_user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
}
@@ -86,7 +86,7 @@ func (r *JobRepository) buildStatsQuery(
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
).From("job").Join("user ON user.username = job.user").GroupBy(col)
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
} else {
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)",
@@ -109,7 +109,7 @@ func (r *JobRepository) buildStatsQuery(
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
// user := GetUserFromContext(ctx)
// name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
// name, _ := r.FindColumnValue(user, id, "hpc_user", "name", "username", false)
// if name != "" {
// return name
// } else {
@@ -210,7 +210,7 @@ func (r *JobRepository) JobsStatsGrouped(
totalAccHours = int(accHours.Int64)
}
if col == "job.user" {
if col == "job.hpc_user" {
// name := r.getUserName(ctx, id.String)
stats = append(stats,
&model.JobsStatistics{
@@ -560,9 +560,9 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
) (*model.MetricHistoPoints, error) {
// Get specific Peak or largest Peak
var metricConfig *schema.MetricConfig
var peak float64 = 0.0
var unit string = ""
var footprintStat string = ""
var peak float64
var unit string
var footprintStat string
for _, f := range filters {
if f.Cluster != nil {
@@ -712,8 +712,8 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
for idx, metric := range metrics {
// Get specific Peak or largest Peak
var metricConfig *schema.MetricConfig
var peak float64 = 0.0
var unit string = ""
var peak float64
var unit string
for _, f := range filters {
if f.Cluster != nil {

View File

@@ -5,7 +5,6 @@
package repository
import (
"context"
"fmt"
"strings"
@@ -16,9 +15,8 @@ import (
)
// Add the tag with id `tagId` to the job with the database id `jobId`.
func (r *JobRepository) AddTag(ctx context.Context, job int64, tag int64) ([]*schema.Tag, error) {
j, err := r.FindById(ctx, job)
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIdWithUser(user, job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
@@ -32,7 +30,7 @@ func (r *JobRepository) AddTag(ctx context.Context, job int64, tag int64) ([]*sc
return nil, err
}
tags, err := r.GetTags(ctx, &job)
tags, err := r.GetTags(user, &job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
@@ -48,9 +46,8 @@ func (r *JobRepository) AddTag(ctx context.Context, job int64, tag int64) ([]*sc
}
// Removes a tag from a job
func (r *JobRepository) RemoveTag(ctx context.Context, job, tag int64) ([]*schema.Tag, error) {
j, err := r.FindById(ctx, job)
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIdWithUser(user, job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
@@ -64,7 +61,7 @@ func (r *JobRepository) RemoveTag(ctx context.Context, job, tag int64) ([]*schem
return nil, err
}
tags, err := r.GetTags(ctx, &job)
tags, err := r.GetTags(user, &job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
@@ -81,7 +78,6 @@ func (r *JobRepository) RemoveTag(ctx context.Context, job, tag int64) ([]*schem
// CreateTag creates a new tag with the specified type and name and returns its database id.
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
@@ -99,7 +95,7 @@ func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope strin
return res.LastInsertId()
}
func (r *JobRepository) CountTags(ctx context.Context) (tags []schema.Tag, counts map[string]int, err error) {
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
// Fetch all Tags in DB for Display in Frontend Tag-View
tags = make([]schema.Tag, 0, 100)
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
@@ -114,7 +110,7 @@ func (r *JobRepository) CountTags(ctx context.Context) (tags []schema.Tag, count
}
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
readable, err := r.checkScopeAuth(ctx, "read", t.Scope)
readable, err := r.checkScopeAuth(user, "read", t.Scope)
if err != nil {
return nil, nil, err
}
@@ -123,8 +119,6 @@ func (r *JobRepository) CountTags(ctx context.Context) (tags []schema.Tag, count
}
}
user := GetUserFromContext(ctx)
// Query and Count Jobs with attached Tags
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
From("tag t").
@@ -147,9 +141,9 @@ func (r *JobRepository) CountTags(ctx context.Context) (tags []schema.Tag, count
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
// Build ("project1", "project2", ...) list of variable length directly in SQL string
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
} else if user != nil { // USER OR NO ROLE (Compatibility): Only count own jobs
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ?)", user.Username)
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ?)", user.Username)
}
rows, err := q.RunWith(r.stmtCache).Query()
@@ -175,14 +169,13 @@ func (r *JobRepository) CountTags(ctx context.Context) (tags []schema.Tag, count
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
// If such a tag does not yet exist, it is created.
func (r *JobRepository) AddTagOrCreate(ctx context.Context, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
}
writable, err := r.checkScopeAuth(ctx, "write", tagScope)
writable, err := r.checkScopeAuth(user, "write", tagScope)
if err != nil {
return 0, err
}
@@ -198,7 +191,7 @@ func (r *JobRepository) AddTagOrCreate(ctx context.Context, jobId int64, tagType
}
}
if _, err := r.AddTag(ctx, jobId, tagId); err != nil {
if _, err := r.AddTag(user, jobId, tagId); err != nil {
return 0, err
}
@@ -217,7 +210,7 @@ func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (
}
// GetTags returns a list of all scoped tags if job is nil or of the tags that the job with that database ID has.
func (r *JobRepository) GetTags(ctx context.Context, job *int64) ([]*schema.Tag, error) {
func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
if job != nil {
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
@@ -238,7 +231,7 @@ func (r *JobRepository) GetTags(ctx context.Context, job *int64) ([]*schema.Tag,
return nil, err
}
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
readable, err := r.checkScopeAuth(ctx, "read", tag.Scope)
readable, err := r.checkScopeAuth(user, "read", tag.Scope)
if err != nil {
return nil, err
}
@@ -299,8 +292,7 @@ func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, t
return nil
}
func (r *JobRepository) checkScopeAuth(ctx context.Context, operation string, scope string) (pass bool, err error) {
user := GetUserFromContext(ctx)
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
if user != nil {
switch {
case operation == "write" && scope == "admin":

Binary file not shown.

Binary file not shown.

View File

@@ -46,8 +46,8 @@ func GetUserRepository() *UserRepository {
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
user := &schema.User{Username: username}
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
Where("user.username = ?", username).RunWith(r.DB).
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("hpc_user").
Where("hpc_user.username = ?", username).RunWith(r.DB).
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
log.Warnf("Error while querying user '%v' from database", username)
return nil, err
@@ -73,7 +73,7 @@ func (r *UserRepository) GetUser(username string) (*schema.User, error) {
func (r *UserRepository) GetLdapUsernames() ([]string, error) {
var users []string
rows, err := r.DB.Query(`SELECT username FROM user WHERE user.ldap = 1`)
rows, err := r.DB.Query(`SELECT username FROM hpc_user WHERE hpc_user.ldap = 1`)
if err != nil {
log.Warn("Error while querying usernames")
return nil, err
@@ -121,7 +121,7 @@ func (r *UserRepository) AddUser(user *schema.User) error {
vals = append(vals, int(user.AuthSource))
}
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
if _, err := sq.Insert("hpc_user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
return err
}
@@ -134,7 +134,7 @@ func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) erro
// user contains updated info, apply to dbuser
// TODO: Discuss updatable fields
if dbUser.Name != user.Name {
if _, err := sq.Update("user").Set("name", user.Name).Where("user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
if _, err := sq.Update("hpc_user").Set("name", user.Name).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
log.Errorf("error while updating name of user '%s'", user.Username)
return err
}
@@ -143,7 +143,7 @@ func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) erro
// Toggled until greenlit
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
// projects, _ := json.Marshal(user.Projects)
// if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
// if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
// return err
// }
// }
@@ -152,7 +152,7 @@ func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) erro
}
func (r *UserRepository) DelUser(username string) error {
_, err := r.DB.Exec(`DELETE FROM user WHERE user.username = ?`, username)
_, err := r.DB.Exec(`DELETE FROM hpc_user WHERE hpc_user.username = ?`, username)
if err != nil {
log.Errorf("Error while deleting user '%s' from DB", username)
return err
@@ -162,7 +162,7 @@ func (r *UserRepository) DelUser(username string) error {
}
func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
q := sq.Select("username", "name", "email", "roles", "projects").From("hpc_user")
if specialsOnly {
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
}
@@ -223,7 +223,7 @@ func (r *UserRepository) AddRole(
}
roles, _ := json.Marshal(append(user.Roles, newRole))
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
if _, err := sq.Update("hpc_user").Set("roles", roles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
log.Errorf("error while adding new role for user '%s'", user.Username)
return err
}
@@ -259,7 +259,7 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
}
mroles, _ := json.Marshal(newroles)
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
if _, err := sq.Update("hpc_user").Set("roles", mroles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
log.Errorf("Error while removing role for user '%s'", user.Username)
return err
}
@@ -285,7 +285,7 @@ func (r *UserRepository) AddProject(
}
projects, _ := json.Marshal(append(user.Projects, project))
if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
return err
}
@@ -323,7 +323,7 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
} else {
result, _ = json.Marshal(newprojects)
}
if _, err := sq.Update("user").Set("projects", result).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
if _, err := sq.Update("hpc_user").Set("projects", result).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
return err
}
return nil
@@ -339,9 +339,10 @@ const ContextUserKey ContextKey = "user"
func GetUserFromContext(ctx context.Context) *schema.User {
x := ctx.Value(ContextUserKey)
if x == nil {
log.Warnf("no user retrieved from context")
return nil
}
// log.Infof("user retrieved from context: %v", x.(*schema.User))
return x.(*schema.User)
}
@@ -354,7 +355,7 @@ func (r *UserRepository) FetchUserInCtx(ctx context.Context, username string) (*
user := &model.User{Username: username}
var name, email sql.NullString
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
if err := sq.Select("name", "email").From("hpc_user").Where("hpc_user.username = ?", username).
RunWith(r.DB).QueryRow().Scan(&name, &email); err != nil {
if err == sql.ErrNoRows {
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */

View File

@@ -35,7 +35,7 @@ type Route struct {
var routes []Route = []Route{
{"/", "home.tmpl", "ClusterCockpit", false, setupHomeRoute},
{"/config", "config.tmpl", "Settings", false, func(i InfoType, r *http.Request) InfoType { return i }},
{"/config", "config.tmpl", "Settings", false, setupConfigRoute},
{"/monitoring/jobs/", "monitoring/jobs.tmpl", "Jobs - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { return i }},
{"/monitoring/job/{id:[0-9]+}", "monitoring/job.tmpl", "Job <ID> - ClusterCockpit", false, setupJobRoute},
{"/monitoring/users/", "monitoring/list.tmpl", "Users - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { i["listType"] = "USER"; return i }},
@@ -53,15 +53,19 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
jobRepo := repository.GetJobRepository()
groupBy := model.AggregateCluster
// startJobCount := time.Now()
stats, err := jobRepo.JobCountGrouped(r.Context(), nil, &groupBy)
if err != nil {
log.Warnf("failed to count jobs: %s", err.Error())
}
// log.Infof("Timer HOME ROUTE startJobCount: %s", time.Since(startJobCount))
// startRunningJobCount := time.Now()
stats, err = jobRepo.AddJobCountGrouped(r.Context(), nil, &groupBy, stats, "running")
if err != nil {
log.Warnf("failed to count running jobs: %s", err.Error())
}
// log.Infof("Timer HOME ROUTE startRunningJobCount: %s", time.Since(startRunningJobCount))
i["clusters"] = stats
@@ -77,6 +81,17 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
return i
}
func setupConfigRoute(i InfoType, r *http.Request) InfoType {
if util.CheckFileExists("./var/notice.txt") {
msg, err := os.ReadFile("./var/notice.txt")
if err == nil {
i["ncontent"] = string(msg)
}
}
return i
}
func setupJobRoute(i InfoType, r *http.Request) InfoType {
i["id"] = mux.Vars(r)["id"]
if config.Keys.EmissionConstant != 0 {
@@ -157,7 +172,7 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
jobRepo := repository.GetJobRepository()
tags, counts, err := jobRepo.CountTags(r.Context())
tags, counts, err := jobRepo.CountTags(repository.GetUserFromContext(r.Context()))
tagMap := make(map[string][]map[string]interface{})
if err != nil {
log.Warnf("GetTags failed: %s", err.Error())
@@ -196,6 +211,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
return i
}
// FIXME: Lots of redundant code. Needs refactoring
func buildFilterPresets(query url.Values) map[string]interface{} {
filterPresets := map[string]interface{}{}
@@ -258,6 +274,16 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
}
}
}
if query.Get("numHWThreads") != "" {
parts := strings.Split(query.Get("numHWThreads"), "-")
if len(parts) == 2 {
a, e1 := strconv.Atoi(parts[0])
b, e2 := strconv.Atoi(parts[1])
if e1 == nil && e2 == nil {
filterPresets["numHWThreads"] = map[string]int{"from": a, "to": b}
}
}
}
if query.Get("numAccelerators") != "" {
parts := strings.Split(query.Get("numAccelerators"), "-")
if len(parts) == 2 {
@@ -299,7 +325,35 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
}
}
}
if query.Get("energy") != "" {
parts := strings.Split(query.Get("energy"), "-")
if len(parts) == 2 {
a, e1 := strconv.Atoi(parts[0])
b, e2 := strconv.Atoi(parts[1])
if e1 == nil && e2 == nil {
filterPresets["energy"] = map[string]int{"from": a, "to": b}
}
}
}
if len(query["stat"]) != 0 {
statList := make([]map[string]interface{}, 0)
for _, statEntry := range query["stat"] {
parts := strings.Split(statEntry, "-")
if len(parts) == 3 { // Metric Footprint Stat Field, from - to
a, e1 := strconv.ParseInt(parts[1], 10, 64)
b, e2 := strconv.ParseInt(parts[2], 10, 64)
if e1 == nil && e2 == nil {
statEntry := map[string]interface{}{
"field": parts[0],
"from": a,
"to": b,
}
statList = append(statList, statEntry)
}
}
}
filterPresets["stats"] = statList
}
return filterPresets
}
@@ -322,6 +376,7 @@ func SetupRoutes(router *mux.Router, buildInfo web.Build) {
// Get User -> What if NIL?
user := repository.GetUserFromContext(r.Context())
// Get Roles
availableRoles, _ := schema.GetValidRolesMap(user)

View File

@@ -14,7 +14,7 @@ import (
func RegisterUpdateDurationWorker() {
var frequency string
if config.Keys.CronFrequency.DurationWorker != "" {
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.DurationWorker != "" {
frequency = config.Keys.CronFrequency.DurationWorker
} else {
frequency = "5m"

View File

@@ -10,7 +10,7 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -20,7 +20,7 @@ import (
func RegisterFootprintWorker() {
var frequency string
if config.Keys.CronFrequency.FootprintWorker != "" {
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.FootprintWorker != "" {
frequency = config.Keys.CronFrequency.FootprintWorker
} else {
frequency = "10m"
@@ -37,32 +37,38 @@ func RegisterFootprintWorker() {
cl := 0
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
t, err := jobRepo.TransactionInit()
if err != nil {
log.Errorf("Failed TransactionInit %v", err)
}
for _, cluster := range archive.Clusters {
s_cluster := time.Now()
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
if err != nil {
continue
}
// NOTE: Additional Subcluster Loop Could Allow For Limited List Of Footprint-Metrics Only.
// - Chunk-Size Would Then Be 'SubCluster' (Running Jobs, Transactions) as Lists Can Change Within SCs
// - Would Require Review of 'updateFootprint' Usage (Logic Could Possibly Be Included Here Completely)
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
scopes := []schema.MetricScope{schema.MetricScopeNode}
scopes = append(scopes, schema.MetricScopeCore)
scopes = append(scopes, schema.MetricScopeAccelerator)
repo, err := metricdata.GetMetricDataRepo(cluster.Name)
if err != nil {
log.Errorf("no metric data repository configured for '%s'", cluster.Name)
continue
}
pendingStatements := []sq.UpdateBuilder{}
for _, job := range jobs {
log.Debugf("Try job %d", job.JobID)
log.Debugf("Prepare job %d", job.JobID)
cl++
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background(), 0) // 0 Resolution-Value retrieves highest res
s_job := time.Now()
jobStats, err := repo.LoadStats(job, allMetrics, context.Background())
if err != nil {
log.Errorf("Error wile loading job data for footprint update: %v", err)
log.Errorf("error wile loading job data stats for footprint update: %v", err)
ce++
continue
}
@@ -73,19 +79,19 @@ func RegisterFootprintWorker() {
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// This should never happen ?
ce++
continue
}
for _, metric := range allMetrics {
avg, min, max := 0.0, 0.0, 0.0
data, ok := jobStats[metric] // JobStats[Metric1:[Hostname1:[Stats], Hostname2:[Stats], ...], Metric2[...] ...]
if ok {
for _, res := range job.Resources {
hostStats, ok := data[res.Hostname]
if ok {
avg += hostStats.Avg
min = math.Min(min, hostStats.Min)
max = math.Max(max, hostStats.Max)
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
}
// Add values rounded to 2 digits
@@ -100,44 +106,41 @@ func RegisterFootprintWorker() {
}
}
// Init UpdateBuilder
// Build Statement per Job, Add to Pending Array
stmt := sq.Update("job")
// Add SET queries
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
if err != nil {
log.Errorf("Update job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
ce++
continue
}
stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
if err != nil {
log.Errorf("Update job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
ce++
continue
}
// Add WHERE Filter
stmt = stmt.Where("job.id = ?", job.ID)
query, args, err := stmt.ToSql()
if err != nil {
log.Errorf("Failed in ToSQL conversion: %v", err)
ce++
continue
}
// Args: JSON, JSON, ENERGY, JOBID
jobRepo.TransactionAdd(t, query, args...)
// if err := jobRepo.Execute(stmt); err != nil {
// log.Errorf("Update job footprint (dbid: %d) failed at db execute: %s", job.ID, err.Error())
// continue
// }
c++
log.Debugf("Finish Job %d", job.JobID)
pendingStatements = append(pendingStatements, stmt)
log.Debugf("Job %d took %s", job.JobID, time.Since(s_job))
}
jobRepo.TransactionCommit(t)
log.Debugf("Finish Cluster %s", cluster.Name)
t, err := jobRepo.TransactionInit()
if err != nil {
log.Errorf("failed TransactionInit %v", err)
log.Errorf("skipped %d transactions for cluster %s", len(pendingStatements), cluster.Name)
ce += len(pendingStatements)
} else {
for _, ps := range pendingStatements {
query, args, err := ps.ToSql()
if err != nil {
log.Errorf("failed in ToSQL conversion: %v", err)
ce++
} else {
// args...: Footprint-JSON, Energyfootprint-JSON, TotalEnergy, JobID
jobRepo.TransactionAdd(t, query, args...)
c++
}
}
jobRepo.TransactionEnd(t)
}
log.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
}
jobRepo.TransactionEnd(t)
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
}))
}