diff --git a/Makefile b/Makefile index 48da4e0..52f0d39 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ TARGET = ./cc-backend VAR = ./var CFG = config.json .env FRONTEND = ./web/frontend -VERSION = 1.4.1 +VERSION = 1.4.2 GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development') CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S") LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}' diff --git a/ReleaseNotes.md b/ReleaseNotes.md index bb25b5d..2659964 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,4 +1,4 @@ -# `cc-backend` version 1.4.1 +# `cc-backend` version 1.4.2 Supports job archive version 2 and database version 8. @@ -12,7 +12,8 @@ For release specific notes visit the [ClusterCockpit Documentation](https://clus migration might require several hours! - You need to adapt the `cluster.json` configuration files in the job-archive, add new required attributes to the metric list and after that edit - `./job-archive/version.txt` to version 2. + `./job-archive/version.txt` to version 2. Only metrics that have the footprint + attribute set can be filtered and show up in the footprint UI and polar plot. - Continuous scrolling is default now in all job lists. You can change this back to paging globally, also every user can configure to use paging or continuous scrolling individually. diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 436379d..33bab07 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -112,7 +112,7 @@ func main() { if flagInit { initEnv() - fmt.Print("Succesfully setup environment!\n") + fmt.Print("Successfully setup environment!\n") fmt.Print("Please review config.json and .env and adjust it to your needs.\n") fmt.Print("Add your job-archive at ./var/job-archive.\n") os.Exit(0) diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 083b9e5..0770e81 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -25,7 +25,6 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph/generated" - "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/routerConfig" "github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv" @@ -314,9 +313,6 @@ func serverShutdown() { // First shut down the server gracefully (waiting for all ongoing requests) server.Shutdown(context.Background()) - // Then, wait for any async jobStarts still pending... - repository.WaitForJobStart() - // Then, wait for any async archivings still pending... archiver.WaitForArchiving() } diff --git a/init/clustercockpit.service b/init/clustercockpit.service index 53fc429..0a9448d 100644 --- a/init/clustercockpit.service +++ b/init/clustercockpit.service @@ -1,5 +1,5 @@ [Unit] -Description=ClusterCockpit Web Server (Go edition) +Description=ClusterCockpit Web Server Documentation=https://github.com/ClusterCockpit/cc-backend Wants=network-online.target After=network-online.target diff --git a/internal/api/api_test.go b/internal/api/api_test.go index bcabd5f..c47bd4d 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -249,9 +249,6 @@ func TestRestApi(t *testing.T) { if response.StatusCode != http.StatusCreated { t.Fatal(response.Status, recorder.Body.String()) } - - time.Sleep(1 * time.Second) - resolver := graph.GetResolverInstance() job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime) if err != nil { diff --git a/internal/api/rest.go b/internal/api/rest.go index db747ce..4e52701 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -123,18 +123,8 @@ func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) { } } -// StartJobApiResponse model -type StartJobApiResponse struct { - Message string `json:"msg"` -} - -// DeleteJobApiResponse model -type DeleteJobApiResponse struct { - Message string `json:"msg"` -} - -// UpdateUserApiResponse model -type UpdateUserApiResponse struct { +// DefaultApiResponse model +type DefaultJobApiResponse struct { Message string `json:"msg"` } @@ -341,7 +331,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { withMetadata := false filter := &model.JobFilter{} page := &model.PageRequest{ItemsPerPage: 25, Page: 1} - order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc} + order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc} for key, vals := range r.URL.Query() { switch key { @@ -790,6 +780,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) { return } + // aquire lock to avoid race condition between API calls + var unlockOnce sync.Once + api.RepositoryMutex.Lock() + defer unlockOnce.Do(api.RepositoryMutex.Unlock) + // Check if combination of (job_id, cluster_id, start_time) already exists: jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil) if err != nil && err != sql.ErrNoRows { @@ -804,12 +799,27 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) { } } - repository.TriggerJobStart(repository.JobWithUser{Job: &req, User: repository.GetUserFromContext(r.Context())}) + id, err := api.JobRepository.Start(&req) + if err != nil { + handleError(fmt.Errorf("insert into database failed: %w", err), http.StatusInternalServerError, rw) + return + } + // unlock here, adding Tags can be async + unlockOnce.Do(api.RepositoryMutex.Unlock) + for _, tag := range req.Tags { + if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil { + http.Error(rw, err.Error(), http.StatusInternalServerError) + handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw) + return + } + } + + log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime) rw.Header().Add("Content-Type", "application/json") rw.WriteHeader(http.StatusCreated) - json.NewEncoder(rw).Encode(StartJobApiResponse{ - Message: fmt.Sprintf("Successfully triggered job start"), + json.NewEncoder(rw).Encode(DefaultJobApiResponse{ + Message: "success", }) } @@ -892,7 +902,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) { } rw.Header().Add("Content-Type", "application/json") rw.WriteHeader(http.StatusOK) - json.NewEncoder(rw).Encode(DeleteJobApiResponse{ + json.NewEncoder(rw).Encode(DefaultJobApiResponse{ Message: fmt.Sprintf("Successfully deleted job %s", id), }) } @@ -943,7 +953,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) rw.Header().Add("Content-Type", "application/json") rw.WriteHeader(http.StatusOK) - json.NewEncoder(rw).Encode(DeleteJobApiResponse{ + json.NewEncoder(rw).Encode(DefaultJobApiResponse{ Message: fmt.Sprintf("Successfully deleted job %d", job.ID), }) } @@ -987,7 +997,7 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) { rw.Header().Add("Content-Type", "application/json") rw.WriteHeader(http.StatusOK) - json.NewEncoder(rw).Encode(DeleteJobApiResponse{ + json.NewEncoder(rw).Encode(DefaultJobApiResponse{ Message: fmt.Sprintf("Successfully deleted %d jobs", cnt), }) } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 9fd7260..b529f2c 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -36,10 +36,7 @@ func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, // ConcurrentJobs is the resolver for the concurrentJobs field. func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) { - if obj.State == schema.JobStateRunning { - obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix) - } - + // FIXME: Make the hardcoded duration configurable if obj.Exclusive != 1 && obj.Duration > 600 { return r.Repo.FindConcurrentJobs(ctx, obj) } diff --git a/internal/repository/dbConnection.go b/internal/repository/dbConnection.go index d062052..418eef9 100644 --- a/internal/repository/dbConnection.go +++ b/internal/repository/dbConnection.go @@ -82,8 +82,6 @@ func Connect(driver string, db string) { if err != nil { log.Fatal(err) } - - startJobStartWorker() }) } diff --git a/internal/repository/job.go b/internal/repository/job.go index cc44ca9..11f3b46 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -79,12 +79,9 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) { } job.RawFootprint = nil - // if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil { - // return nil, err - // } - job.StartTime = time.Unix(job.StartTimeUnix, 0) - if job.Duration == 0 && job.State == schema.JobStateRunning { + // Always ensure accurate duration for running jobs + if job.State == schema.JobStateRunning { job.Duration = int32(time.Since(job.StartTime).Seconds()) } @@ -457,6 +454,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in return subclusters, nil } +// FIXME: Set duration to requested walltime? func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error { start := time.Now() res, err := sq.Update("job"). diff --git a/internal/repository/jobQuery.go b/internal/repository/jobQuery.go index 0ab2ea2..b43b569 100644 --- a/internal/repository/jobQuery.go +++ b/internal/repository/jobQuery.go @@ -170,8 +170,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select query = buildTimeCondition("job.start_time", filter.StartTime, query) } if filter.Duration != nil { - now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. - query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To) + query = buildIntCondition("job.duration", filter.Duration, query) } if filter.MinRunningFor != nil { now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. diff --git a/internal/repository/jobStartWorker.go b/internal/repository/jobStartWorker.go deleted file mode 100644 index 18d2be7..0000000 --- a/internal/repository/jobStartWorker.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. -package repository - -import ( - "sync" - "time" - - "github.com/ClusterCockpit/cc-backend/pkg/log" - "github.com/ClusterCockpit/cc-backend/pkg/schema" -) - -type JobWithUser struct { - Job *schema.JobMeta - User *schema.User -} - -var ( - jobStartPending sync.WaitGroup - jobStartChannel chan JobWithUser -) - -func startJobStartWorker() { - jobStartChannel = make(chan JobWithUser, 128) - - go jobStartWorker() -} - -// Archiving worker thread -func jobStartWorker() { - for { - select { - case req, ok := <-jobStartChannel: - if !ok { - break - } - jobRepo := GetJobRepository() - var id int64 - - for i := 0; i < 5; i++ { - var err error - - id, err = jobRepo.Start(req.Job) - if err != nil { - log.Errorf("Attempt %d: insert into database failed: %v", i, err) - } else { - break - } - time.Sleep(1 * time.Second) - } - - for _, tag := range req.Job.Tags { - if _, err := jobRepo.AddTagOrCreate(req.User, id, - tag.Type, tag.Name, tag.Scope); err != nil { - log.Errorf("adding tag to new job %d failed: %v", id, err) - } - } - - log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", - id, req.Job.Cluster, req.Job.JobID, req.Job.User, req.Job.StartTime) - - jobStartPending.Done() - } - } -} - -// Trigger async archiving -func TriggerJobStart(req JobWithUser) { - if jobStartChannel == nil { - log.Fatal("Cannot start Job without jobStart channel. Did you Start the worker?") - } - - jobStartPending.Add(1) - jobStartChannel <- req -} - -// Wait for background thread to finish pending archiving operations -func WaitForJobStart() { - // close channel and wait for worker to process remaining jobs - jobStartPending.Wait() -} diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 6d1fbfc..1ca9ec5 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -111,7 +111,7 @@ func BenchmarkDB_QueryJobs(b *testing.B) { user := "mppi133h" filter.User = &model.StringInput{Eq: &user} page := &model.PageRequest{ItemsPerPage: 50, Page: 1} - order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc} + order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc} b.Run("QueryJobs", func(b *testing.B) { db := setup(b) diff --git a/internal/routerConfig/routes.go b/internal/routerConfig/routes.go index 2267efb..1a3317f 100644 --- a/internal/routerConfig/routes.go +++ b/internal/routerConfig/routes.go @@ -182,6 +182,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType { return i } +// FIXME: Lots of redundant code. Needs refactoring func buildFilterPresets(query url.Values) map[string]interface{} { filterPresets := map[string]interface{}{} diff --git a/pkg/schema/schemas/job-data.schema.json b/pkg/schema/schemas/job-data.schema.json index e8a5739..c0c492b 100644 --- a/pkg/schema/schemas/job-data.schema.json +++ b/pkg/schema/schemas/job-data.schema.json @@ -1,490 +1,490 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "embedfs://job-data.schema.json", - "title": "Job metric data list", - "description": "Collection of metric data of a HPC job", - "type": "object", - "properties": { - "mem_used": { - "description": "Memory capacity used", - "type": "object", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "flops_any": { - "description": "Total flop rate with DP flops scaled up", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "core": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "mem_bw": { - "description": "Main memory bandwidth", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "net_bw": { - "description": "Total fast interconnect network bandwidth", - "type": "object", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "ipc": { - "description": "Instructions executed per cycle", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "core": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "cpu_user": { - "description": "CPU user active core utilization", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "core": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "cpu_load": { - "description": "CPU requested core utilization (load 1m)", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "flops_dp": { - "description": "Double precision flop rate", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "core": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "flops_sp": { - "description": "Single precision flops rate", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "core": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "vectorization_ratio": { - "description": "Fraction of arithmetic instructions using SIMD instructions", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "core": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "cpu_power": { - "description": "CPU power consumption", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "mem_power": { - "description": "Memory power consumption", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "acc_utilization": { - "description": "GPU utilization", - "properties": { - "accelerator": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "accelerator" - ] - }, - "acc_mem_used": { - "description": "GPU memory capacity used", - "properties": { - "accelerator": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "accelerator" - ] - }, - "acc_power": { - "description": "GPU power consumption", - "properties": { - "accelerator": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "accelerator" - ] - }, - "clock": { - "description": "Average core frequency", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "socket": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "core": { - "$ref": "embedfs://job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "eth_read_bw": { - "description": "Ethernet read bandwidth", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "eth_write_bw": { - "description": "Ethernet write bandwidth", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "filesystems": { - "description": "Array of filesystems", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "nfs", - "lustre", - "gpfs", - "nvme", - "ssd", - "hdd", - "beegfs" - ] - }, - "read_bw": { - "description": "File system read bandwidth", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "write_bw": { - "description": "File system write bandwidth", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "read_req": { - "description": "File system read requests", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "write_req": { - "description": "File system write requests", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "inodes": { - "description": "File system write requests", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "accesses": { - "description": "File system open and close", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "fsync": { - "description": "File system fsync", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "create": { - "description": "File system create", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "open": { - "description": "File system open", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "close": { - "description": "File system close", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "seek": { - "description": "File system seek", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - } - }, - "required": [ - "name", - "type", - "read_bw", - "write_bw" - ] - }, - "minItems": 1 + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-data.schema.json", + "title": "Job metric data list", + "description": "Collection of metric data of a HPC job", + "type": "object", + "properties": { + "mem_used": { + "description": "Memory capacity used", + "type": "object", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" } + }, + "required": [ + "node" + ] }, - "ic_rcv_packets": { - "description": "Network interconnect read packets", + "flops_any": { + "description": "Total flop rate with DP flops scaled up", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "mem_bw": { + "description": "Main memory bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "net_bw": { + "description": "Total fast interconnect network bandwidth", + "type": "object", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "ipc": { + "description": "Instructions executed per cycle", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "cpu_user": { + "description": "CPU user active core utilization", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "cpu_load": { + "description": "CPU requested core utilization (load 1m)", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "flops_dp": { + "description": "Double precision flop rate", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "flops_sp": { + "description": "Single precision flops rate", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "vectorization_ratio": { + "description": "Fraction of arithmetic instructions using SIMD instructions", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "cpu_power": { + "description": "CPU power consumption", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "mem_power": { + "description": "Memory power consumption", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "acc_utilization": { + "description": "GPU utilization", + "properties": { + "accelerator": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "accelerator" + ] + }, + "acc_mem_used": { + "description": "GPU memory capacity used", + "properties": { + "accelerator": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "accelerator" + ] + }, + "acc_power": { + "description": "GPU power consumption", + "properties": { + "accelerator": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "accelerator" + ] + }, + "clock": { + "description": "Average core frequency", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "eth_read_bw": { + "description": "Ethernet read bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "eth_write_bw": { + "description": "Ethernet write bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "filesystems": { + "description": "Array of filesystems", + "type": "array", + "items": { + "type": "object", "properties": { - "node": { + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "nfs", + "lustre", + "gpfs", + "nvme", + "ssd", + "hdd", + "beegfs" + ] + }, + "read_bw": { + "description": "File system read bandwidth", + "properties": { + "node": { "$ref": "embedfs://job-metric-data.schema.json" - } + } + }, + "required": [ + "node" + ] + }, + "write_bw": { + "description": "File system write bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "read_req": { + "description": "File system read requests", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "write_req": { + "description": "File system write requests", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "inodes": { + "description": "File system write requests", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "accesses": { + "description": "File system open and close", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "fsync": { + "description": "File system fsync", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "create": { + "description": "File system create", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "open": { + "description": "File system open", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "close": { + "description": "File system close", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "seek": { + "description": "File system seek", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + } }, "required": [ - "node" - ] - }, - "ic_send_packets": { - "description": "Network interconnect send packet", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "ic_read_bw": { - "description": "Network interconnect read bandwidth", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "ic_write_bw": { - "description": "Network interconnect write bandwidth", - "properties": { - "node": { - "$ref": "embedfs://job-metric-data.schema.json" - } - }, - "required": [ - "node" + "name", + "type", + "read_bw", + "write_bw" ] + }, + "minItems": 1 + } + }, + "ic_rcv_packets": { + "description": "Network interconnect read packets", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } }, "required": [ - "cpu_user", - "cpu_load", - "mem_used", - "flops_any", - "mem_bw", - "net_bw", - "filesystems" + "node" ] + }, + "ic_send_packets": { + "description": "Network interconnect send packet", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "ic_read_bw": { + "description": "Network interconnect read bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "ic_write_bw": { + "description": "Network interconnect write bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "required": [ + "cpu_user", + "cpu_load", + "mem_used", + "flops_any", + "mem_bw", + "net_bw", + "filesystems" + ] } diff --git a/pkg/schema/schemas/job-meta.schema.json b/pkg/schema/schemas/job-meta.schema.json index b907d7f..db7475c 100644 --- a/pkg/schema/schemas/job-meta.schema.json +++ b/pkg/schema/schemas/job-meta.schema.json @@ -1,351 +1,351 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "embedfs://job-meta.schema.json", - "title": "Job meta data", - "description": "Meta data information of a HPC job", - "type": "object", - "properties": { - "jobId": { - "description": "The unique identifier of a job", - "type": "integer" - }, - "user": { - "description": "The unique identifier of a user", + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-meta.schema.json", + "title": "Job meta data", + "description": "Meta data information of a HPC job", + "type": "object", + "properties": { + "jobId": { + "description": "The unique identifier of a job", + "type": "integer" + }, + "user": { + "description": "The unique identifier of a user", + "type": "string" + }, + "project": { + "description": "The unique identifier of a project", + "type": "string" + }, + "cluster": { + "description": "The unique identifier of a cluster", + "type": "string" + }, + "subCluster": { + "description": "The unique identifier of a sub cluster", + "type": "string" + }, + "partition": { + "description": "The Slurm partition to which the job was submitted", + "type": "string" + }, + "arrayJobId": { + "description": "The unique identifier of an array job", + "type": "integer" + }, + "numNodes": { + "description": "Number of nodes used", + "type": "integer", + "exclusiveMinimum": 0 + }, + "numHwthreads": { + "description": "Number of HWThreads used", + "type": "integer", + "exclusiveMinimum": 0 + }, + "numAcc": { + "description": "Number of accelerators used", + "type": "integer", + "exclusiveMinimum": 0 + }, + "exclusive": { + "description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user", + "type": "integer", + "minimum": 0, + "maximum": 2 + }, + "monitoringStatus": { + "description": "State of monitoring system during job run", + "type": "integer" + }, + "smt": { + "description": "SMT threads used by job", + "type": "integer" + }, + "walltime": { + "description": "Requested walltime of job in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "jobState": { + "description": "Final state of job", + "type": "string", + "enum": [ + "completed", + "failed", + "cancelled", + "stopped", + "out_of_memory", + "timeout" + ] + }, + "startTime": { + "description": "Start epoch time stamp in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "duration": { + "description": "Duration of job in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "resources": { + "description": "Resources used by job", + "type": "array", + "items": { + "type": "object", + "properties": { + "hostname": { "type": "string" - }, - "project": { - "description": "The unique identifier of a project", - "type": "string" - }, - "cluster": { - "description": "The unique identifier of a cluster", - "type": "string" - }, - "subCluster": { - "description": "The unique identifier of a sub cluster", - "type": "string" - }, - "partition": { - "description": "The Slurm partition to which the job was submitted", - "type": "string" - }, - "arrayJobId": { - "description": "The unique identifier of an array job", - "type": "integer" - }, - "numNodes": { - "description": "Number of nodes used", - "type": "integer", - "exclusiveMinimum": 0 - }, - "numHwthreads": { - "description": "Number of HWThreads used", - "type": "integer", - "exclusiveMinimum": 0 - }, - "numAcc": { - "description": "Number of accelerators used", - "type": "integer", - "exclusiveMinimum": 0 - }, - "exclusive": { - "description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user", - "type": "integer", - "minimum": 0, - "maximum": 2 - }, - "monitoringStatus": { - "description": "State of monitoring system during job run", - "type": "integer" - }, - "smt": { - "description": "SMT threads used by job", - "type": "integer" - }, - "walltime": { - "description": "Requested walltime of job in seconds", - "type": "integer", - "exclusiveMinimum": 0 - }, - "jobState": { - "description": "Final state of job", + }, + "hwthreads": { + "type": "array", + "description": "List of OS processor ids", + "items": { + "type": "integer" + } + }, + "accelerators": { + "type": "array", + "description": "List of of accelerator device ids", + "items": { + "type": "string" + } + }, + "configuration": { "type": "string", - "enum": [ - "completed", - "failed", - "cancelled", - "stopped", - "out_of_memory", - "timeout" - ] + "description": "The configuration options of the node" + } }, - "startTime": { - "description": "Start epoch time stamp in seconds", - "type": "integer", - "exclusiveMinimum": 0 + "required": [ + "hostname" + ], + "minItems": 1 + } + }, + "metaData": { + "description": "Additional information about the job", + "type": "object", + "properties": { + "jobScript": { + "type": "string", + "description": "The batch script of the job" }, - "duration": { - "description": "Duration of job in seconds", - "type": "integer", - "exclusiveMinimum": 0 + "jobName": { + "type": "string", + "description": "Slurm Job name" }, - "resources": { - "description": "Resources used by job", - "type": "array", - "items": { - "type": "object", - "properties": { - "hostname": { - "type": "string" - }, - "hwthreads": { - "type": "array", - "description": "List of OS processor ids", - "items": { - "type": "integer" - } - }, - "accelerators": { - "type": "array", - "description": "List of of accelerator device ids", - "items": { - "type": "string" - } - }, - "configuration": { - "type": "string", - "description": "The configuration options of the node" - } - }, - "required": [ - "hostname" - ], - "minItems": 1 - } + "slurmInfo": { + "type": "string", + "description": "Additional slurm infos as show by scontrol show job" + } + } + }, + "tags": { + "description": "List of tags", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string" + } }, - "metaData": { - "description": "Additional information about the job", + "required": [ + "name", + "type" + ] + }, + "uniqueItems": true + }, + "statistics": { + "description": "Job statistic data", + "type": "object", + "properties": { + "mem_used": { + "description": "Memory capacity used (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "cpu_load": { + "description": "CPU requested core utilization (load 1m) (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "flops_any": { + "description": "Total flop rate with DP flops scaled up (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "mem_bw": { + "description": "Main memory bandwidth (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "net_bw": { + "description": "Total fast interconnect network bandwidth (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "file_bw": { + "description": "Total file IO bandwidth (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ipc": { + "description": "Instructions executed per cycle", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "cpu_user": { + "description": "CPU user active core utilization", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "flops_dp": { + "description": "Double precision flop rate", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "flops_sp": { + "description": "Single precision flops rate", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "rapl_power": { + "description": "CPU power consumption", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "acc_used": { + "description": "GPU utilization", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "acc_mem_used": { + "description": "GPU memory capacity used", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "acc_power": { + "description": "GPU power consumption", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "clock": { + "description": "Average core frequency", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "eth_read_bw": { + "description": "Ethernet read bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "eth_write_bw": { + "description": "Ethernet write bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_rcv_packets": { + "description": "Network interconnect read packets", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_send_packets": { + "description": "Network interconnect send packet", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_read_bw": { + "description": "Network interconnect read bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_write_bw": { + "description": "Network interconnect write bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "filesystems": { + "description": "Array of filesystems", + "type": "array", + "items": { "type": "object", "properties": { - "jobScript": { - "type": "string", - "description": "The batch script of the job" - }, - "jobName": { - "type": "string", - "description": "Slurm Job name" - }, - "slurmInfo": { - "type": "string", - "description": "Additional slurm infos as show by scontrol show job" - } - } - }, - "tags": { - "description": "List of tags", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "type": { - "type": "string" - } - }, - "required": [ - "name", - "type" + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "nfs", + "lustre", + "gpfs", + "nvme", + "ssd", + "hdd", + "beegfs" ] - }, - "uniqueItems": true - }, - "statistics": { - "description": "Job statistic data", - "type": "object", - "properties": { - "mem_used": { - "description": "Memory capacity used (required)", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "cpu_load": { - "description": "CPU requested core utilization (load 1m) (required)", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "flops_any": { - "description": "Total flop rate with DP flops scaled up (required)", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "mem_bw": { - "description": "Main memory bandwidth (required)", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "net_bw": { - "description": "Total fast interconnect network bandwidth (required)", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "file_bw": { - "description": "Total file IO bandwidth (required)", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "ipc": { - "description": "Instructions executed per cycle", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "cpu_user": { - "description": "CPU user active core utilization", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "flops_dp": { - "description": "Double precision flop rate", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "flops_sp": { - "description": "Single precision flops rate", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "rapl_power": { - "description": "CPU power consumption", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "acc_used": { - "description": "GPU utilization", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "acc_mem_used": { - "description": "GPU memory capacity used", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "acc_power": { - "description": "GPU power consumption", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "clock": { - "description": "Average core frequency", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "eth_read_bw": { - "description": "Ethernet read bandwidth", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "eth_write_bw": { - "description": "Ethernet write bandwidth", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "ic_rcv_packets": { - "description": "Network interconnect read packets", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "ic_send_packets": { - "description": "Network interconnect send packet", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "ic_read_bw": { - "description": "Network interconnect read bandwidth", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "ic_write_bw": { - "description": "Network interconnect write bandwidth", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "filesystems": { - "description": "Array of filesystems", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "nfs", - "lustre", - "gpfs", - "nvme", - "ssd", - "hdd", - "beegfs" - ] - }, - "read_bw": { - "description": "File system read bandwidth", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "write_bw": { - "description": "File system write bandwidth", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "read_req": { - "description": "File system read requests", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "write_req": { - "description": "File system write requests", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "inodes": { - "description": "File system write requests", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "accesses": { - "description": "File system open and close", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "fsync": { - "description": "File system fsync", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "create": { - "description": "File system create", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "open": { - "description": "File system open", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "close": { - "description": "File system close", - "$ref": "embedfs://job-metric-statistics.schema.json" - }, - "seek": { - "description": "File system seek", - "$ref": "embedfs://job-metric-statistics.schema.json" - } - }, - "required": [ - "name", - "type", - "read_bw", - "write_bw" - ] - }, - "minItems": 1 - } + }, + "read_bw": { + "description": "File system read bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "write_bw": { + "description": "File system write bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "read_req": { + "description": "File system read requests", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "write_req": { + "description": "File system write requests", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "inodes": { + "description": "File system write requests", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "accesses": { + "description": "File system open and close", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "fsync": { + "description": "File system fsync", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "create": { + "description": "File system create", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "open": { + "description": "File system open", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "close": { + "description": "File system close", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "seek": { + "description": "File system seek", + "$ref": "embedfs://job-metric-statistics.schema.json" + } }, "required": [ - "cpu_user", - "cpu_load", - "mem_used", - "flops_any", - "mem_bw" + "name", + "type", + "read_bw", + "write_bw" ] + }, + "minItems": 1 } - }, - "required": [ - "jobId", - "user", - "project", - "cluster", - "subCluster", - "numNodes", - "exclusive", - "startTime", - "jobState", - "duration", - "resources", - "statistics" - ] + }, + "required": [ + "cpu_user", + "cpu_load", + "mem_used", + "flops_any", + "mem_bw" + ] + } + }, + "required": [ + "jobId", + "user", + "project", + "cluster", + "subCluster", + "numNodes", + "exclusive", + "startTime", + "jobState", + "duration", + "resources", + "statistics" + ] } diff --git a/pkg/schema/schemas/job-metric-data.schema.json b/pkg/schema/schemas/job-metric-data.schema.json index 3f2b934..ad499bf 100644 --- a/pkg/schema/schemas/job-metric-data.schema.json +++ b/pkg/schema/schemas/job-metric-data.schema.json @@ -1,216 +1,216 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "embedfs://job-metric-data.schema.json", - "title": "Job metric data", - "description": "Metric data of a HPC job", - "type": "object", - "properties": { - "unit": { - "description": "Metric unit", - "$ref": "embedfs://unit.schema.json" - }, - "timestep": { - "description": "Measurement interval in seconds", - "type": "integer" - }, - "thresholds": { - "description": "Metric thresholds for specific system", - "type": "object", - "properties": { - "peak": { - "type": "number" - }, - "normal": { - "type": "number" - }, - "caution": { - "type": "number" - }, - "alert": { - "type": "number" - } - } - }, - "statisticsSeries": { - "type": "object", - "description": "Statistics series across topology", - "properties": { - "min": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "max": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "mean": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "percentiles": { - "type": "object", - "properties": { - "10": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "20": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "30": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "40": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "50": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "60": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "70": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "80": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "90": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "25": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "75": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - } - } - } - } - }, - "series": { - "type": "array", - "items": { - "type": "object", - "properties": { - "hostname": { - "type": "string" - }, - "id": { - "type": "string" - }, - "statistics": { - "type": "object", - "description": "Statistics across time dimension", - "properties": { - "avg": { - "description": "Series average", - "type": "number", - "minimum": 0 - }, - "min": { - "description": "Series minimum", - "type": "number", - "minimum": 0 - }, - "max": { - "description": "Series maximum", - "type": "number", - "minimum": 0 - } - }, - "required": [ - "avg", - "min", - "max" - ] - }, - "data": { - "type": "array", - "contains": { - "type": "number", - "minimum": 0 - }, - "minItems": 1 - } - }, - "required": [ - "hostname", - "statistics", - "data" - ] - } - } + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-metric-data.schema.json", + "title": "Job metric data", + "description": "Metric data of a HPC job", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" }, - "required": [ - "unit", - "timestep", - "series" - ] + "timestep": { + "description": "Measurement interval in seconds", + "type": "integer" + }, + "thresholds": { + "description": "Metric thresholds for specific system", + "type": "object", + "properties": { + "peak": { + "type": "number" + }, + "normal": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "alert": { + "type": "number" + } + } + }, + "statisticsSeries": { + "type": "object", + "description": "Statistics series across topology", + "properties": { + "min": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "max": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "mean": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "percentiles": { + "type": "object", + "properties": { + "10": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "20": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "30": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "40": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "50": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "60": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "70": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "80": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "90": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "25": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "75": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + } + } + } + } + }, + "series": { + "type": "array", + "items": { + "type": "object", + "properties": { + "hostname": { + "type": "string" + }, + "id": { + "type": "string" + }, + "statistics": { + "type": "object", + "description": "Statistics across time dimension", + "properties": { + "avg": { + "description": "Series average", + "type": "number", + "minimum": 0 + }, + "min": { + "description": "Series minimum", + "type": "number", + "minimum": 0 + }, + "max": { + "description": "Series maximum", + "type": "number", + "minimum": 0 + } + }, + "required": [ + "avg", + "min", + "max" + ] + }, + "data": { + "type": "array", + "contains": { + "type": "number", + "minimum": 0 + }, + "minItems": 1 + } + }, + "required": [ + "hostname", + "statistics", + "data" + ] + } + } + }, + "required": [ + "unit", + "timestep", + "series" + ] } diff --git a/pkg/schema/schemas/job-metric-statistics.schema.json b/pkg/schema/schemas/job-metric-statistics.schema.json index 3412c23..f753ed3 100644 --- a/pkg/schema/schemas/job-metric-statistics.schema.json +++ b/pkg/schema/schemas/job-metric-statistics.schema.json @@ -1,34 +1,34 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "embedfs://job-metric-statistics.schema.json", - "title": "Job statistics", - "description": "Format specification for job metric statistics", - "type": "object", - "properties": { - "unit": { - "description": "Metric unit", - "$ref": "embedfs://unit.schema.json" - }, - "avg": { - "description": "Job metric average", - "type": "number", - "minimum": 0 - }, - "min": { - "description": "Job metric minimum", - "type": "number", - "minimum": 0 - }, - "max": { - "description": "Job metric maximum", - "type": "number", - "minimum": 0 - } + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-metric-statistics.schema.json", + "title": "Job statistics", + "description": "Format specification for job metric statistics", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" }, - "required": [ - "unit", - "avg", - "min", - "max" - ] + "avg": { + "description": "Job metric average", + "type": "number", + "minimum": 0 + }, + "min": { + "description": "Job metric minimum", + "type": "number", + "minimum": 0 + }, + "max": { + "description": "Job metric maximum", + "type": "number", + "minimum": 0 + } + }, + "required": [ + "unit", + "avg", + "min", + "max" + ] } diff --git a/pkg/schema/schemas/unit.schema.json b/pkg/schema/schemas/unit.schema.json index 9ee781c..c0a3df3 100644 --- a/pkg/schema/schemas/unit.schema.json +++ b/pkg/schema/schemas/unit.schema.json @@ -1,40 +1,40 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "embedfs://unit.schema.json", - "title": "Metric unit", - "description": "Format specification for job metric units", - "type": "object", - "properties": { - "base": { - "description": "Metric base unit", - "type": "string", - "enum": [ - "B", - "F", - "B/s", - "F/s", - "CPI", - "IPC", - "Hz", - "W", - "°C", - "" - ] - }, - "prefix": { - "description": "Unit prefix", - "type": "string", - "enum": [ - "K", - "M", - "G", - "T", - "P", - "E" - ] - } + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://unit.schema.json", + "title": "Metric unit", + "description": "Format specification for job metric units", + "type": "object", + "properties": { + "base": { + "description": "Metric base unit", + "type": "string", + "enum": [ + "B", + "F", + "B/s", + "F/s", + "CPI", + "IPC", + "Hz", + "W", + "°C", + "" + ] }, - "required": [ - "base" - ] + "prefix": { + "description": "Unit prefix", + "type": "string", + "enum": [ + "K", + "M", + "G", + "T", + "P", + "E" + ] + } + }, + "required": [ + "base" + ] } diff --git a/web/frontend/src/generic/helper/JobFootprint.svelte b/web/frontend/src/generic/helper/JobFootprint.svelte index 187eff9..f9bc165 100644 --- a/web/frontend/src/generic/helper/JobFootprint.svelte +++ b/web/frontend/src/generic/helper/JobFootprint.svelte @@ -9,12 +9,11 @@ --> @@ -93,7 +69,7 @@ const unit = (fmc?.unit?.prefix ? fmc.unit.prefix : "") + (fmc?.unit?.base ? fmc.unit.base : "") // Threshold / -Differences - const fmt = findJobThresholds(job, fmc); + const fmt = findJobThresholds(job, jf.stat, fmc); if (jf.name === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0); // Define basic data -> Value: Use as Provided diff --git a/web/frontend/src/generic/joblist/JobInfo.svelte b/web/frontend/src/generic/joblist/JobInfo.svelte index adacd4f..8917653 100644 --- a/web/frontend/src/generic/joblist/JobInfo.svelte +++ b/web/frontend/src/generic/joblist/JobInfo.svelte @@ -7,7 +7,7 @@ --> @@ -58,13 +76,18 @@ {job.jobId} ({job.cluster}) - + + { displayCheck ? 'Copied!' : 'Copy Job ID to Clipboard' } + {#if job.metaData?.jobName} {#if job.metaData?.jobName.length <= 25}