20 Commits

Author SHA1 Message Date
Michael Panzlaff
24d43f3540 allow /start_job/ with 0 second duration
Apparently it is possible to get this for very short jobs.
2025-02-27 15:11:07 +01:00
Michael Panzlaff
e376f97547 make swagger 2025-02-27 14:42:18 +01:00
Michael Panzlaff
f2428d3cb3 /jobs/stop_job/ change bad job_state to HTTP 422 2025-02-27 14:20:18 +01:00
Michael Panzlaff
2fdac85d31 fix: Do not allow to start a job with a state != running 2025-02-27 14:04:54 +01:00
Christoph Kluge
b731395689 Merge branch 'dev' of https://github.com/ClusterCockpit/cc-backend into dev 2025-02-26 15:21:00 +01:00
Christoph Kluge
07405e3466 fix: add missing exclusive filter handler for jobQuery 2025-02-26 15:20:58 +01:00
Jan Eitzinger
c0443cbec2 Merge pull request #334 from ClusterCockpit/add_statsRounding_dataLoader
Add stats rounding data loader
2025-02-25 13:07:03 +01:00
Christoph Kluge
633bd42036 Add rounding to archiver avg stat calculation 2025-02-24 11:10:45 +01:00
Christoph Kluge
998ef8d834 fix: use job_view_selectedMetrics config instead of iterating globalMetrics
- Caveat: Minimal Defaultset needs to be generally available on all clusters
2025-02-19 16:40:25 +01:00
Christoph Kluge
c25b076ca9 fix: separate polar plot metric list from job.footprint return 2025-02-19 16:38:35 +01:00
Christoph Kluge
f43379f365 fix: add missing factor for job power calculation, see #340 2025-02-18 18:10:39 +01:00
Christoph Kluge
58e678d72c fix: load jobView roofline on finest resolution separately by default, see #339 2025-02-17 18:24:28 +01:00
Christoph Kluge
1b043838ea fix: fix svelte js race condition on metric selection change, see #335
- only dispatch new data to statsTable on 'load-all'
2025-02-14 16:18:58 +01:00
Michael Panzlaff
f7a67c72bf fix 'unhandled case' error for core metrics 2025-02-13 17:34:45 +01:00
Christoph Kluge
c5476d08fa amend polar frontend logs 2025-02-12 10:11:54 +01:00
Christoph Kluge
8af92b1557 simplify polar plot data code, add scaling for shared jobs to polar 2025-02-11 18:38:48 +01:00
Christoph Kluge
eaa826bb8a adds centralized rounding of series stats in dataLoader
- Fixed to two digit precision
2025-02-11 10:57:04 +01:00
Christoph Kluge
140b3c371d fix undefined if system_view_selectedMetric missing
- defaults to first metric on init
- reorder gitignore
2025-02-10 15:36:14 +01:00
Michael Panzlaff
2bd7c8d51e Fix 'make -B'
Do not raise an error, if the directory already exists.
2025-02-03 16:52:50 +01:00
Christoph Kluge
1e63cdbcda fix: remove caching for footprint db field
- footprints before first worker run are cached as empty, and are permanently returned as such until app restart
- fixes Polar plot for running jobs #328
2025-01-30 17:14:17 +01:00
22 changed files with 426 additions and 189 deletions

22
.gitignore vendored
View File

@@ -1,21 +1,23 @@
/cc-backend /cc-backend
/var/job-archive
/var/*.db
/var/machine-state
/.env /.env
/config.json /config.json
/var/job-archive
/var/machine-state
/var/job.db-shm
/var/job.db-wal
/var/*.db
/var/*.txt
/web/frontend/public/build /web/frontend/public/build
/web/frontend/node_modules /web/frontend/node_modules
/.vscode/*
/archive-migration /archive-migration
/archive-manager /archive-manager
var/job.db-shm
var/job.db-wal
/internal/repository/testdata/job.db-shm
/internal/repository/testdata/job.db-wal
/.vscode/*
dist/ dist/
*.db *.db
internal/repository/testdata/job.db-shm
internal/repository/testdata/job.db-wal

View File

@@ -82,7 +82,7 @@ tags:
@ctags -R @ctags -R
$(VAR): $(VAR):
@mkdir $(VAR) @mkdir -p $(VAR)
config.json: config.json:
$(info ===> Initialize config.json file) $(info ===> Initialize config.json file)

View File

@@ -202,7 +202,7 @@
"200": { "200": {
"description": "Success message", "description": "Success message",
"schema": { "schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -272,7 +272,7 @@
"200": { "200": {
"description": "Success message", "description": "Success message",
"schema": { "schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -342,7 +342,7 @@
"200": { "200": {
"description": "Success message", "description": "Success message",
"schema": { "schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -487,7 +487,7 @@
"201": { "201": {
"description": "Job added successfully", "description": "Job added successfully",
"schema": { "schema": {
"$ref": "#/definitions/api.StartJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -581,7 +581,7 @@
} }
}, },
"422": { "422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set", "description": "Unprocessable Entity: job has already been stopped",
"schema": { "schema": {
"$ref": "#/definitions/api.ErrorResponse" "$ref": "#/definitions/api.ErrorResponse"
} }
@@ -827,6 +827,72 @@
} }
} }
}, },
"/notice/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Modifies the content of notice.txt, shown as notice box on the homepage.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
"produces": [
"text/plain"
],
"tags": [
"User"
],
"summary": "Updates or empties the notice box content",
"parameters": [
{
"type": "string",
"description": "Priority 1: New content to display",
"name": "new-content",
"in": "formData"
}
],
"responses": {
"200": {
"description": "Success Response Message",
"schema": {
"type": "string"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "string"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"type": "string"
}
},
"403": {
"description": "Forbidden",
"schema": {
"type": "string"
}
},
"422": {
"description": "Unprocessable Entity: The user could not be updated",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/user/{id}": { "/user/{id}": {
"post": { "post": {
"security": [ "security": [
@@ -1207,6 +1273,14 @@
} }
} }
}, },
"api.DefaultJobApiResponse": {
"type": "object",
"properties": {
"msg": {
"type": "string"
}
}
},
"api.DeleteJobApiRequest": { "api.DeleteJobApiRequest": {
"type": "object", "type": "object",
"required": [ "required": [
@@ -1230,14 +1304,6 @@
} }
} }
}, },
"api.DeleteJobApiResponse": {
"type": "object",
"properties": {
"msg": {
"type": "string"
}
}
},
"api.EditMetaRequest": { "api.EditMetaRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
@@ -1324,14 +1390,6 @@
} }
} }
}, },
"api.StartJobApiResponse": {
"type": "object",
"properties": {
"msg": {
"type": "string"
}
}
},
"api.StopJobApiRequest": { "api.StopJobApiRequest": {
"type": "object", "type": "object",
"required": [ "required": [

View File

@@ -32,6 +32,11 @@ definitions:
example: Debug example: Debug
type: string type: string
type: object type: object
api.DefaultJobApiResponse:
properties:
msg:
type: string
type: object
api.DeleteJobApiRequest: api.DeleteJobApiRequest:
properties: properties:
cluster: cluster:
@@ -49,11 +54,6 @@ definitions:
required: required:
- jobId - jobId
type: object type: object
api.DeleteJobApiResponse:
properties:
msg:
type: string
type: object
api.EditMetaRequest: api.EditMetaRequest:
properties: properties:
key: key:
@@ -112,11 +112,6 @@ definitions:
scope: scope:
$ref: '#/definitions/schema.MetricScope' $ref: '#/definitions/schema.MetricScope'
type: object type: object
api.StartJobApiResponse:
properties:
msg:
type: string
type: object
api.StopJobApiRequest: api.StopJobApiRequest:
properties: properties:
cluster: cluster:
@@ -906,7 +901,7 @@ paths:
"200": "200":
description: Success message description: Success message
schema: schema:
$ref: '#/definitions/api.DeleteJobApiResponse' $ref: '#/definitions/api.DefaultJobApiResponse'
"400": "400":
description: Bad Request description: Bad Request
schema: schema:
@@ -953,7 +948,7 @@ paths:
"200": "200":
description: Success message description: Success message
schema: schema:
$ref: '#/definitions/api.DeleteJobApiResponse' $ref: '#/definitions/api.DefaultJobApiResponse'
"400": "400":
description: Bad Request description: Bad Request
schema: schema:
@@ -1000,7 +995,7 @@ paths:
"200": "200":
description: Success message description: Success message
schema: schema:
$ref: '#/definitions/api.DeleteJobApiResponse' $ref: '#/definitions/api.DefaultJobApiResponse'
"400": "400":
description: Bad Request description: Bad Request
schema: schema:
@@ -1098,7 +1093,7 @@ paths:
"201": "201":
description: Job added successfully description: Job added successfully
schema: schema:
$ref: '#/definitions/api.StartJobApiResponse' $ref: '#/definitions/api.DefaultJobApiResponse'
"400": "400":
description: Bad Request description: Bad Request
schema: schema:
@@ -1161,8 +1156,7 @@ paths:
schema: schema:
$ref: '#/definitions/api.ErrorResponse' $ref: '#/definitions/api.ErrorResponse'
"422": "422":
description: 'Unprocessable Entity: finding job failed: sql: no rows in description: 'Unprocessable Entity: job has already been stopped'
result set'
schema: schema:
$ref: '#/definitions/api.ErrorResponse' $ref: '#/definitions/api.ErrorResponse'
"500": "500":
@@ -1224,6 +1218,51 @@ paths:
summary: Adds one or more tags to a job summary: Adds one or more tags to a job
tags: tags:
- Job add and modify - Job add and modify
/notice/:
post:
consumes:
- multipart/form-data
description: |-
Modifies the content of notice.txt, shown as notice box on the homepage.
If more than one formValue is set then only the highest priority field is used.
Only accessible from IPs registered with apiAllowedIPs configuration option.
parameters:
- description: 'Priority 1: New content to display'
in: formData
name: new-content
type: string
produces:
- text/plain
responses:
"200":
description: Success Response Message
schema:
type: string
"400":
description: Bad Request
schema:
type: string
"401":
description: Unauthorized
schema:
type: string
"403":
description: Forbidden
schema:
type: string
"422":
description: 'Unprocessable Entity: The user could not be updated'
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Updates or empties the notice box content
tags:
- User
/user/{id}: /user/{id}:
post: post:
consumes: consumes:

View File

@@ -208,7 +208,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Success message", "description": "Success message",
"schema": { "schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -278,7 +278,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Success message", "description": "Success message",
"schema": { "schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -348,7 +348,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Success message", "description": "Success message",
"schema": { "schema": {
"$ref": "#/definitions/api.DeleteJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -493,7 +493,7 @@ const docTemplate = `{
"201": { "201": {
"description": "Job added successfully", "description": "Job added successfully",
"schema": { "schema": {
"$ref": "#/definitions/api.StartJobApiResponse" "$ref": "#/definitions/api.DefaultJobApiResponse"
} }
}, },
"400": { "400": {
@@ -587,7 +587,7 @@ const docTemplate = `{
} }
}, },
"422": { "422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set", "description": "Unprocessable Entity: job has already been stopped",
"schema": { "schema": {
"$ref": "#/definitions/api.ErrorResponse" "$ref": "#/definitions/api.ErrorResponse"
} }
@@ -833,6 +833,72 @@ const docTemplate = `{
} }
} }
}, },
"/notice/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Modifies the content of notice.txt, shown as notice box on the homepage.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
"produces": [
"text/plain"
],
"tags": [
"User"
],
"summary": "Updates or empties the notice box content",
"parameters": [
{
"type": "string",
"description": "Priority 1: New content to display",
"name": "new-content",
"in": "formData"
}
],
"responses": {
"200": {
"description": "Success Response Message",
"schema": {
"type": "string"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "string"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"type": "string"
}
},
"403": {
"description": "Forbidden",
"schema": {
"type": "string"
}
},
"422": {
"description": "Unprocessable Entity: The user could not be updated",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/user/{id}": { "/user/{id}": {
"post": { "post": {
"security": [ "security": [
@@ -1213,6 +1279,14 @@ const docTemplate = `{
} }
} }
}, },
"api.DefaultJobApiResponse": {
"type": "object",
"properties": {
"msg": {
"type": "string"
}
}
},
"api.DeleteJobApiRequest": { "api.DeleteJobApiRequest": {
"type": "object", "type": "object",
"required": [ "required": [
@@ -1236,14 +1310,6 @@ const docTemplate = `{
} }
} }
}, },
"api.DeleteJobApiResponse": {
"type": "object",
"properties": {
"msg": {
"type": "string"
}
}
},
"api.EditMetaRequest": { "api.EditMetaRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
@@ -1330,14 +1396,6 @@ const docTemplate = `{
} }
} }
}, },
"api.StartJobApiResponse": {
"type": "object",
"properties": {
"msg": {
"type": "string"
}
}
},
"api.StopJobApiRequest": { "api.StopJobApiRequest": {
"type": "object", "type": "object",
"required": [ "required": [

View File

@@ -757,7 +757,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
// @accept json // @accept json
// @produce json // @produce json
// @param request body schema.JobMeta true "Job to add" // @param request body schema.JobMeta true "Job to add"
// @success 201 {object} api.StartJobApiResponse "Job added successfully" // @success 201 {object} api.DefaultJobApiResponse "Job added successfully"
// @failure 400 {object} api.ErrorResponse "Bad Request" // @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized" // @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden" // @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -772,9 +772,8 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
return return
} }
if req.State == "" { req.State = schema.JobStateRunning
req.State = schema.JobStateRunning
}
if err := importer.SanityChecks(&req.BaseJob); err != nil { if err := importer.SanityChecks(&req.BaseJob); err != nil {
handleError(err, http.StatusBadRequest, rw) handleError(err, http.StatusBadRequest, rw)
return return
@@ -835,7 +834,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
// @failure 401 {object} api.ErrorResponse "Unauthorized" // @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden" // @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 404 {object} api.ErrorResponse "Resource not found" // @failure 404 {object} api.ErrorResponse "Resource not found"
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set" // @failure 422 {object} api.ErrorResponse "Unprocessable Entity: job has already been stopped"
// @failure 500 {object} api.ErrorResponse "Internal Server Error" // @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth // @security ApiKeyAuth
// @router /jobs/stop_job/ [post] // @router /jobs/stop_job/ [post]
@@ -871,7 +870,7 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
// @description Job to remove is specified by database ID. This will not remove the job from the job archive. // @description Job to remove is specified by database ID. This will not remove the job from the job archive.
// @produce json // @produce json
// @param id path int true "Database ID of Job" // @param id path int true "Database ID of Job"
// @success 200 {object} api.DeleteJobApiResponse "Success message" // @success 200 {object} api.DefaultJobApiResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request" // @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized" // @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden" // @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -914,7 +913,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
// @accept json // @accept json
// @produce json // @produce json
// @param request body api.DeleteJobApiRequest true "All fields required" // @param request body api.DeleteJobApiRequest true "All fields required"
// @success 200 {object} api.DeleteJobApiResponse "Success message" // @success 200 {object} api.DefaultJobApiResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request" // @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized" // @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden" // @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -964,7 +963,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive. // @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
// @produce json // @produce json
// @param ts path int true "Unix epoch timestamp" // @param ts path int true "Unix epoch timestamp"
// @success 200 {object} api.DeleteJobApiResponse "Success message" // @success 200 {object} api.DefaultJobApiResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request" // @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized" // @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden" // @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -1004,8 +1003,13 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) { func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
// Sanity checks // Sanity checks
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning { if job.State != schema.JobStateRunning {
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d and only running jobs can be stopped (state is: %s)", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix(), job.State), http.StatusBadRequest, rw) handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw)
return
}
if job == nil || job.StartTime.Unix() > req.StopTime {
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
return return
} }

View File

@@ -60,12 +60,13 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
max = math.Max(max, series.Statistics.Max) max = math.Max(max, series.Statistics.Max)
} }
// Round AVG Result to 2 Digits
jobMeta.Statistics[metric] = schema.JobStatistics{ jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{ Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix, Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base, Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
}, },
Avg: avg / float64(job.NumNodes), Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
Min: min, Min: min,
Max: max, Max: max,
} }

View File

@@ -303,6 +303,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
// JobsFootprints is the resolver for the jobsFootprints field. // JobsFootprints is the resolver for the jobsFootprints field.
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) { func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
return r.jobsFootprints(ctx, filter, metrics) return r.jobsFootprints(ctx, filter, metrics)
} }

View File

@@ -170,6 +170,9 @@ func LoadData(job *schema.Job,
jd.AddNodeScope("mem_bw") jd.AddNodeScope("mem_bw")
} }
// Round Resulting Stat Values
jd.RoundMetricStats()
return jd, ttl, size return jd, ttl, size
}) })

View File

@@ -440,6 +440,23 @@ func (ccms *CCMetricStore) buildQueries(
continue continue
} }
// Core -> Socket
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromCores(hwthreads)
for _, socket := range sockets {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &coreString,
TypeIds: intToStringSlice(topology.Socket[socket]),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
}
continue
}
// Core -> Node // Core -> Node
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
cores, _ := topology.GetCoresFromHWThreads(hwthreads) cores, _ := topology.GetCoresFromHWThreads(hwthreads)
@@ -627,7 +644,7 @@ func (ccms *CCMetricStore) LoadNodeData(
req.Queries = append(req.Queries, ApiQuery{ req.Queries = append(req.Queries, ApiQuery{
Hostname: node, Hostname: node,
Metric: ccms.toRemoteName(metric), Metric: ccms.toRemoteName(metric),
Resolution: 60, // Default for Node Queries Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution
}) })
} }
} }
@@ -1038,6 +1055,23 @@ func (ccms *CCMetricStore) buildNodeQueries(
continue continue
} }
// Core -> Socket
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromCores(topology.Node)
for _, socket := range sockets {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: hostname,
Aggregate: true,
Type: &coreString,
TypeIds: intToStringSlice(topology.Socket[socket]),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
}
continue
}
// Core -> Node // Core -> Node
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
cores, _ := topology.GetCoresFromHWThreads(topology.Node) cores, _ := topology.GetCoresFromHWThreads(topology.Node)

View File

@@ -217,11 +217,6 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) { func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
start := time.Now() start := time.Now()
cachekey := fmt.Sprintf("footprint:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.Footprint = cached.(map[string]float64)
return job.Footprint, nil
}
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID). if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil { RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
@@ -238,7 +233,6 @@ func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, err
return nil, err return nil, err
} }
r.cache.Put(cachekey, job.Footprint, len(job.Footprint), 24*time.Hour)
log.Debugf("Timer FetchFootprint %s", time.Since(start)) log.Debugf("Timer FetchFootprint %s", time.Since(start))
return job.Footprint, nil return job.Footprint, nil
} }
@@ -606,8 +600,11 @@ func (r *JobRepository) UpdateEnergy(
// FIXME: Needs sum as stats type // FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt) } else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds) // Energy: Power (in Watts) * Time (in Seconds)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits // Unit: (( W * s ) / 3600) / 1000 = kWh ; Rounded to 2 nearest digits: (Energy * 100) / 100
energy = math.Round(((LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100 // Here: All-Node Metric Average * Number of Nodes * Job Runtime
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
metricNodeSum := LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes) * float64(jobMeta.Duration)
energy = math.Round(((metricNodeSum/3600)/1000)*100) / 100
} }
} else { } else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID) log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)

View File

@@ -176,6 +176,9 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor) query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
} }
if filter.Exclusive != nil {
query = query.Where("job.exclusive = ?", *filter.Exclusive)
}
if filter.State != nil { if filter.State != nil {
states := make([]string, len(filter.State)) states := make([]string, len(filter.State))
for i, val := range filter.State { for i, val := range filter.State {

View File

@@ -94,7 +94,7 @@ func RegisterFootprintWorker() {
} }
} }
// Add values rounded to 2 digits // Add values rounded to 2 digits: repo.LoadStats may return unrounded
jobMeta.Statistics[metric] = schema.JobStatistics{ jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{ Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix, Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,

View File

@@ -122,6 +122,38 @@ func (topo *Topology) GetSocketsFromHWThreads(
return sockets, exclusive return sockets, exclusive
} }
// Return a list of socket IDs given a list of core IDs. Even if just one
// core is in that socket, add it to the list. If no cores other than
// those in the argument list are assigned to one of the sockets in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetSocketsFromCores (
cores []int,
) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
for _, core := range cores {
for _, hwthreadInCore := range topo.Core[core] {
for socket, hwthreadsInSocket := range topo.Socket {
for _, hwthreadInSocket := range hwthreadsInSocket {
if hwthreadInCore == hwthreadInSocket {
socketsMap[socket] += 1
}
}
}
}
}
exclusive = true
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
exclusive = exclusive && count == hwthreadsPerSocket
}
return sockets, exclusive
}
// Return a list of core IDs given a list of hwthread IDs. Even if just one // Return a list of core IDs given a list of hwthread IDs. Even if just one
// hwthread is in that core, add it to the list. If no hwthreads other than // hwthread is in that core, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the cores in the first // those in the argument list are assigned to one of the cores in the first

View File

@@ -291,6 +291,21 @@ func (jd *JobData) AddNodeScope(metric string) bool {
return true return true
} }
func (jd *JobData) RoundMetricStats() {
// TODO: Make Digit-Precision Configurable? (Currently: Fixed to 2 Digits)
for _, scopes := range *jd {
for _, jm := range scopes {
for index := range jm.Series {
jm.Series[index].Statistics = MetricStatistics{
Avg: (math.Round(jm.Series[index].Statistics.Avg*100) / 100),
Min: (math.Round(jm.Series[index].Statistics.Min*100) / 100),
Max: (math.Round(jm.Series[index].Statistics.Max*100) / 100),
}
}
}
}
}
func (jm *JobMetric) AddPercentiles(ps []int) bool { func (jm *JobMetric) AddPercentiles(ps []int) bool {
if jm.StatisticsSeries == nil { if jm.StatisticsSeries == nil {
jm.AddStatisticsSeries() jm.AddStatisticsSeries()

View File

@@ -446,7 +446,7 @@
} }
}, },
"job_view_selectedMetrics": { "job_view_selectedMetrics": {
"description": "", "description": "Initial metrics shown as plots in single job view",
"type": "array", "type": "array",
"items": { "items": {
"type": "string", "type": "string",

View File

@@ -117,27 +117,41 @@
} }
`; `;
const roofQuery = gql`
query ($dbid: ID!, $selectedMetrics: [String!]!, $selectedScopes: [MetricScope!]!, $selectedResolution: Int) {
jobMetrics(id: $dbid, metrics: $selectedMetrics, scopes: $selectedScopes, resolution: $selectedResolution) {
name
scope
metric {
series {
data
}
}
}
}
`;
$: jobMetrics = queryStore({ $: jobMetrics = queryStore({
client: client, client: client,
query: query, query: query,
variables: { dbid, selectedMetrics, selectedScopes }, variables: { dbid, selectedMetrics, selectedScopes },
}); });
// Roofline: Always load roofMetrics with configured timestep (Resolution: 0)
$: roofMetrics = queryStore({
client: client,
query: roofQuery,
variables: { dbid, selectedMetrics: ["flops_any", "mem_bw"], selectedScopes: ["node"], selectedResolution: 0 },
});
// Handle Job Query on Init -> is not executed anymore // Handle Job Query on Init -> is not executed anymore
getContext("on-init")(() => { getContext("on-init")(() => {
let job = $initq.data.job; let job = $initq.data.job;
if (!job) return; if (!job) return;
const pendingMetrics = [ const pendingMetrics = [
"flops_any",
"mem_bw",
...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] || ...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
$initq.data.globalMetrics.reduce((names, gm) => { ccconfig[`job_view_selectedMetrics`]
if (gm.availability.find((av) => av.cluster === job.cluster)) {
names.push(gm.name);
}
return names;
}, [])
), ),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] || ...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics`] ccconfig[`job_view_nodestats_selectedMetrics`]
@@ -276,12 +290,12 @@
<!-- Column 3: Job Roofline; If footprint Enabled: full width, else half width --> <!-- Column 3: Job Roofline; If footprint Enabled: full width, else half width -->
<Col xs={12} md={12} xl={5} xxl={6}> <Col xs={12} md={12} xl={5} xxl={6}>
{#if $initq.error || $jobMetrics.error} {#if $initq.error || $roofMetrics.error}
<Card body color="danger"> <Card body color="danger">
<p>Initq Error: {$initq.error?.message}</p> <p>Initq Error: {$initq.error?.message}</p>
<p>jobMetrics Error: {$jobMetrics.error?.message}</p> <p>roofMetrics (jobMetrics) Error: {$roofMetrics.error?.message}</p>
</Card> </Card>
{:else if $initq?.data && $jobMetrics?.data} {:else if $initq?.data && $roofMetrics?.data}
<Card style="height: 400px;"> <Card style="height: 400px;">
<div bind:clientWidth={roofWidth}> <div bind:clientWidth={roofWidth}>
<Roofline <Roofline
@@ -292,10 +306,10 @@
.find((c) => c.name == $initq.data.job.cluster) .find((c) => c.name == $initq.data.job.cluster)
.subClusters.find((sc) => sc.name == $initq.data.job.subCluster)} .subClusters.find((sc) => sc.name == $initq.data.job.subCluster)}
data={transformDataForRoofline( data={transformDataForRoofline(
$jobMetrics.data?.jobMetrics?.find( $roofMetrics.data?.jobMetrics?.find(
(m) => m.name == "flops_any" && m.scope == "node", (m) => m.name == "flops_any" && m.scope == "node",
)?.metric, )?.metric,
$jobMetrics.data?.jobMetrics?.find( $roofMetrics.data?.jobMetrics?.find(
(m) => m.name == "mem_bw" && m.scope == "node", (m) => m.name == "mem_bw" && m.scope == "node",
)?.metric, )?.metric,
)} )}

View File

@@ -80,6 +80,7 @@
: ccconfig.user_view_histogramMetrics || []; : ccconfig.user_view_histogramMetrics || [];
const client = getContextClient(); const client = getContextClient();
// Note: nodeMetrics are requested on configured $timestep resolution
$: mainQuery = queryStore({ $: mainQuery = queryStore({
client: client, client: client,
query: gql` query: gql`

View File

@@ -77,6 +77,7 @@
for (let sm of systemMetrics) { for (let sm of systemMetrics) {
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "") systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
} }
if (!selectedMetric) selectedMetric = systemMetrics[0].name
} }
$: loadMetrics($initialized) $: loadMetrics($initialized)

View File

@@ -2,16 +2,12 @@
@component Polar Plot based on chartJS Radar @component Polar Plot based on chartJS Radar
Properties: Properties:
- `footprintData [Object]?`: job.footprint content, evaluated in regards to peak config in jobSummary.svelte [Default: null] - `polarMetrics [Object]?`: Metric names and scaled peak values for rendering polar plot [Default: [] ]
- `metrics [String]?`: Metric names to display as polar plot [Default: null]
- `cluster GraphQL.Cluster?`: Cluster Object of the parent job [Default: null]
- `subCluster GraphQL.SubCluster?`: SubCluster Object of the parent job [Default: null]
- `jobMetrics [GraphQL.JobMetricWithName]?`: Metric data [Default: null] - `jobMetrics [GraphQL.JobMetricWithName]?`: Metric data [Default: null]
- `height Number?`: Plot height [Default: 365] - `height Number?`: Plot height [Default: 365]
--> -->
<script> <script>
import { getContext } from 'svelte'
import { Radar } from 'svelte-chartjs'; import { Radar } from 'svelte-chartjs';
import { import {
Chart as ChartJS, Chart as ChartJS,
@@ -34,54 +30,37 @@
LineElement LineElement
); );
export let footprintData = null; export let polarMetrics = [];
export let metrics = null;
export let cluster = null;
export let subCluster = null;
export let jobMetrics = null; export let jobMetrics = null;
export let height = 350; export let height = 350;
function getLabels() { const labels = polarMetrics
if (footprintData) { .filter((m) => (m.peak != null))
return footprintData.filter(fpd => { .map(pm => pm.name)
if (!jobMetrics.find(m => m.name == fpd.name && m.scope == "node" || fpd.impact == 4)) { .sort(function (a, b) {return ((a > b) ? 1 : ((b > a) ? -1 : 0))});
console.warn(`PolarPlot: No metric data for '${fpd.name}'`)
return false function loadData(type) {
} if (!labels) {
return true console.warn("Empty 'polarMetrics' array prop! Cannot render Polar representation.")
}) return []
.map(filtered => filtered.name)
.sort(function (a, b) {
return ((a > b) ? 1 : ((b > a) ? -1 : 0));
});
} else { } else {
return metrics.filter(name => { if (type === 'avg') {
if (!jobMetrics.find(m => m.name == name && m.scope == "node")) { return getValues(getAvg)
console.warn(`PolarPlot: No metric data for '${name}'`) } else if (type === 'max') {
return false return getValues(getMax)
} } else if (type === 'min') {
return true return getValues(getMin)
}) }
.sort(function (a, b) { console.log('Unknown Type For Polar Data (must be one of [min, max, avg])')
return ((a > b) ? 1 : ((b > a) ? -1 : 0)); return []
});
} }
} }
const labels = getLabels(); // Helpers
const getMetricConfig = getContext("getMetricConfig");
const getValuesForStatGeneric = (getStat) => labels.map(name => { const getValues = (getStat) => labels.map(name => {
// TODO: Requires Scaling if Shared Job // Peak is adapted and scaled for job shared state
const peak = getMetricConfig(cluster, subCluster, name).peak const peak = polarMetrics.find(m => m.name == name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak
return value <= 1. ? value : 1.
})
const getValuesForStatFootprint = (getStat) => labels.map(name => {
// FootprintData 'Peak' is pre-scaled for Shared Jobs in JobSummary Component
const peak = footprintData.find(fpd => fpd.name === name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node") const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak const value = getStat(metric.metric) / peak
return value <= 1. ? value : 1. return value <= 1. ? value : 1.
@@ -108,36 +87,14 @@
return avg / metric.series.length return avg / metric.series.length
} }
function loadDataGeneric(type) { // Chart JS Objects
if (type === 'avg') {
return getValuesForStatGeneric(getAvg)
} else if (type === 'max') {
return getValuesForStatGeneric(getMax)
} else if (type === 'min') {
return getValuesForStatGeneric(getMin)
}
console.log('Unknown Type For Polar Data')
return []
}
function loadDataForFootprint(type) {
if (type === 'avg') {
return getValuesForStatFootprint(getAvg)
} else if (type === 'max') {
return getValuesForStatFootprint(getMax)
} else if (type === 'min') {
return getValuesForStatFootprint(getMin)
}
console.log('Unknown Type For Polar Data')
return []
}
const data = { const data = {
labels: labels, labels: labels,
datasets: [ datasets: [
{ {
label: 'Max', label: 'Max',
data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), // Node Scope Only data: loadData('max'), // Node Scope Only
fill: 1, fill: 1,
backgroundColor: 'rgba(0, 0, 255, 0.25)', backgroundColor: 'rgba(0, 0, 255, 0.25)',
borderColor: 'rgb(0, 0, 255)', borderColor: 'rgb(0, 0, 255)',
@@ -148,7 +105,7 @@
}, },
{ {
label: 'Avg', label: 'Avg',
data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // Node Scope Only data: loadData('avg'), // Node Scope Only
fill: 2, fill: 2,
backgroundColor: 'rgba(255, 210, 0, 0.25)', backgroundColor: 'rgba(255, 210, 0, 0.25)',
borderColor: 'rgb(255, 210, 0)', borderColor: 'rgb(255, 210, 0)',
@@ -159,7 +116,7 @@
}, },
{ {
label: 'Min', label: 'Min',
data: footprintData ? loadDataForFootprint('min') : loadDataGeneric('min'), // Node Scope Only data: loadData('min'), // Node Scope Only
fill: true, fill: true,
backgroundColor: 'rgba(255, 0, 0, 0.25)', backgroundColor: 'rgba(255, 0, 0, 0.25)',
borderColor: 'rgb(255, 0, 0)', borderColor: 'rgb(255, 0, 0)',

View File

@@ -30,9 +30,25 @@
export let height = "400px"; export let height = "400px";
const ccconfig = getContext("cc-config") const ccconfig = getContext("cc-config")
const showFootprint = !!ccconfig[`job_view_showFootprint`]; const globalMetrics = getContext("globalMetrics")
const showFootprintTab = !!ccconfig[`job_view_showFootprint`];
const footprintData = job?.footprint?.map((jf) => { // Metrics Configured To Be Footprints For (sub)Cluster
const clusterFootprintMetrics = getContext("clusters")
.find((c) => c.name == job.cluster)?.subClusters
.find((sc) => sc.name == job.subCluster)?.footprint || []
// Data For Polarplot Will Be Calculated Based On JobMetrics And Thresholds
const polarMetrics = globalMetrics.reduce((pms, gm) => {
if (clusterFootprintMetrics.includes(gm.name)) {
const fmt = findJobFootprintThresholds(job, gm.footprint, getContext("getMetricConfig")(job.cluster, job.subCluster, gm.name));
pms.push({ name: gm.name, peak: fmt ? fmt.peak : null });
}
return pms;
}, [])
// Prepare Job Footprint Data Based On Values Saved In Database
const jobFootprintData = !showFootprintTab ? null : job?.footprint?.map((jf) => {
const fmc = getContext("getMetricConfig")(job.cluster, job.subCluster, jf.name); const fmc = getContext("getMetricConfig")(job.cluster, job.subCluster, jf.name);
if (fmc) { if (fmc) {
// Unit // Unit
@@ -187,16 +203,16 @@
return res; return res;
}; };
$: summaryMessages = writeSummary(footprintData) $: summaryMessages = writeSummary(jobFootprintData)
*/ */
</script> </script>
<Card class="overflow-auto" style="width: {width}; height: {height}"> <Card class="overflow-auto" style="width: {width}; height: {height}">
<TabContent> <!-- on:tab={(e) => (status = e.detail)} --> <TabContent> <!-- on:tab={(e) => (status = e.detail)} -->
{#if showFootprint} {#if showFootprintTab}
<TabPane tabId="foot" tab="Footprint" active> <TabPane tabId="foot" tab="Footprint" active>
<CardBody> <CardBody>
{#each footprintData as fpd, index} {#each jobFootprintData as fpd, index}
{#if fpd.impact !== 4} {#if fpd.impact !== 4}
<div class="mb-1 d-flex justify-content-between"> <div class="mb-1 d-flex justify-content-between">
<div>&nbsp;<b>{fpd.name} ({fpd.stat})</b></div> <div>&nbsp;<b>{fpd.name} ({fpd.stat})</b></div>
@@ -237,7 +253,7 @@
>{fpd.message}</Tooltip >{fpd.message}</Tooltip
> >
</div> </div>
<Row cols={12} class="{(footprintData.length == (index + 1)) ? 'mb-0' : 'mb-2'}"> <Row cols={12} class="{(jobFootprintData.length == (index + 1)) ? 'mb-0' : 'mb-2'}">
{#if fpd.dir} {#if fpd.dir}
<Col xs="1"> <Col xs="1">
<Icon name="caret-left-fill" /> <Icon name="caret-left-fill" />
@@ -279,10 +295,10 @@
</CardBody> </CardBody>
</TabPane> </TabPane>
{/if} {/if}
<TabPane tabId="polar" tab="Polar" active={!showFootprint}> <TabPane tabId="polar" tab="Polar" active={!showFootprintTab}>
<CardBody> <CardBody>
<Polar <Polar
{footprintData} {polarMetrics}
{jobMetrics} {jobMetrics}
/> />
</CardBody> </CardBody>

View File

@@ -148,17 +148,18 @@
zoomState = {...pendingZoomState} zoomState = {...pendingZoomState}
} }
// Set selected scope to min of returned scopes // On additional scope request
if (selectedScope == "load-all") { if (selectedScope == "load-all") {
// Push scope to statsTable (Needs to be in this case, else newly selected 'Metric.svelte' renders cause statsTable race condition)
const statsTableData = $metricData.data.singleUpdate.filter((x) => x.scope !== "node")
if (statsTableData.length > 0) {
dispatch("more-loaded", statsTableData);
}
// Set selected scope to min of returned scopes
selectedScope = minScope(scopes) selectedScope = minScope(scopes)
nodeOnly = (selectedScope == "node") // "node" still only scope after load-all nodeOnly = (selectedScope == "node") // "node" still only scope after load-all
} }
const statsTableData = $metricData.data.singleUpdate.filter((x) => x.scope !== "node")
if (statsTableData.length > 0) {
dispatch("more-loaded", statsTableData);
}
patternMatches = statsPattern.exec(selectedScope) patternMatches = statsPattern.exec(selectedScope)
if (!patternMatches) { if (!patternMatches) {