From c662ced7e76ef9a23c2d231cdb9d7f266761acde Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jun 2023 14:33:36 +0200 Subject: [PATCH] Initial rest endpoint for metrics data --- api/swagger.json | 155 +++++++++++++++++++++++++++++++++++++++++-- api/swagger.yaml | 117 ++++++++++++++++++++++++++++++-- internal/api/docs.go | 155 +++++++++++++++++++++++++++++++++++++++++-- internal/api/rest.go | 103 +++++++++++++++++++++++++++- pkg/schema/job.go | 2 +- 5 files changed, 509 insertions(+), 23 deletions(-) diff --git a/api/swagger.json b/api/swagger.json index 5c32a2d..2ce3519 100644 --- a/api/swagger.json +++ b/api/swagger.json @@ -12,7 +12,7 @@ "name": "MIT License", "url": "https://opensource.org/licenses/MIT" }, - "version": "0.2.0" + "version": "1" }, "host": "localhost:8080", "basePath": "/api", @@ -622,6 +622,91 @@ } } } + }, + "/jobs/{id}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "query" + ], + "summary": "Get complete job meta and metric data", + "parameters": [ + { + "type": "integer", + "description": "Database ID of Job", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "Array of metric names", + "name": "request", + "in": "body", + "required": true, + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + } + ], + "responses": { + "200": { + "description": "Job resource", + "schema": { + "$ref": "#/definitions/schema.JobMeta" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "404": { + "description": "Resource not found", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "422": { + "description": "Unprocessable Entity: finding job failed: sql: no rows in result set", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + } } }, "definitions": { @@ -765,6 +850,9 @@ "type": "string", "example": "fritz" }, + "concurrentJobs": { + "$ref": "#/definitions/schema.JobLinkResultList" + }, "duration": { "description": "Duration of job in seconds (Min \u003e 0)", "type": "integer", @@ -789,6 +877,14 @@ }, "jobState": { "description": "Final state of job", + "enum": [ + "completed", + "failed", + "cancelled", + "stopped", + "timeout", + "out_of_memory" + ], "allOf": [ { "$ref": "#/definitions/schema.JobState" @@ -817,7 +913,7 @@ "example": 2 }, "numHwthreads": { - "description": "Number of HWThreads used (Min \u003e 0)", + "description": "NumCores int32 `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` // Number of HWThreads used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 20 @@ -879,6 +975,31 @@ } } }, + "schema.JobLink": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "jobId": { + "type": "integer" + } + } + }, + "schema.JobLinkResultList": { + "type": "object", + "properties": { + "count": { + "type": "integer" + }, + "items": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.JobLink" + } + } + } + }, "schema.JobMeta": { "description": "Meta data information of a HPC job.", "type": "object", @@ -893,6 +1014,9 @@ "type": "string", "example": "fritz" }, + "concurrentJobs": { + "$ref": "#/definitions/schema.JobLinkResultList" + }, "duration": { "description": "Duration of job in seconds (Min \u003e 0)", "type": "integer", @@ -917,6 +1041,14 @@ }, "jobState": { "description": "Final state of job", + "enum": [ + "completed", + "failed", + "cancelled", + "stopped", + "timeout", + "out_of_memory" + ], "allOf": [ { "$ref": "#/definitions/schema.JobState" @@ -945,7 +1077,7 @@ "example": 2 }, "numHwthreads": { - "description": "Number of HWThreads used (Min \u003e 0)", + "description": "NumCores int32 `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` // Number of HWThreads used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 20 @@ -1062,9 +1194,7 @@ "example": 2000 }, "unit": { - "description": "Metric unit (see schema/unit.schema.json)", - "type": "string", - "example": "GHz" + "$ref": "#/definitions/schema.Unit" } } }, @@ -1101,7 +1231,7 @@ "type": "object", "properties": { "id": { - "description": "The unique DB identifier of a tag", + "description": "The unique DB identifier of a tag\nThe unique DB identifier of a tag", "type": "integer" }, "name": { @@ -1115,6 +1245,17 @@ "example": "Debug" } } + }, + "schema.Unit": { + "type": "object", + "properties": { + "base": { + "type": "string" + }, + "prefix": { + "type": "string" + } + } } }, "securityDefinitions": { diff --git a/api/swagger.yaml b/api/swagger.yaml index 7d008b8..eb54e4f 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -100,6 +100,8 @@ definitions: description: The unique identifier of a cluster example: fritz type: string + concurrentJobs: + $ref: '#/definitions/schema.JobLinkResultList' duration: description: Duration of job in seconds (Min > 0) example: 43200 @@ -124,6 +126,13 @@ definitions: allOf: - $ref: '#/definitions/schema.JobState' description: Final state of job + enum: + - completed + - failed + - cancelled + - stopped + - timeout + - out_of_memory example: completed metaData: additionalProperties: @@ -143,7 +152,9 @@ definitions: minimum: 1 type: integer numHwthreads: - description: Number of HWThreads used (Min > 0) + description: NumCores int32 `json:"numCores" db:"num_cores" + example:"20" minimum:"1"` // + Number of HWThreads used (Min > 0) example: 20 minimum: 1 type: integer @@ -191,6 +202,22 @@ definitions: minimum: 1 type: integer type: object + schema.JobLink: + properties: + id: + type: integer + jobId: + type: integer + type: object + schema.JobLinkResultList: + properties: + count: + type: integer + items: + items: + $ref: '#/definitions/schema.JobLink' + type: array + type: object schema.JobMeta: description: Meta data information of a HPC job. properties: @@ -202,6 +229,8 @@ definitions: description: The unique identifier of a cluster example: fritz type: string + concurrentJobs: + $ref: '#/definitions/schema.JobLinkResultList' duration: description: Duration of job in seconds (Min > 0) example: 43200 @@ -226,6 +255,13 @@ definitions: allOf: - $ref: '#/definitions/schema.JobState' description: Final state of job + enum: + - completed + - failed + - cancelled + - stopped + - timeout + - out_of_memory example: completed metaData: additionalProperties: @@ -245,7 +281,9 @@ definitions: minimum: 1 type: integer numHwthreads: - description: Number of HWThreads used (Min > 0) + description: NumCores int32 `json:"numCores" db:"num_cores" + example:"20" minimum:"1"` // + Number of HWThreads used (Min > 0) example: 20 minimum: 1 type: integer @@ -339,9 +377,7 @@ definitions: minimum: 0 type: number unit: - description: Metric unit (see schema/unit.schema.json) - example: GHz - type: string + $ref: '#/definitions/schema.Unit' type: object schema.Resource: description: A resource used by a job @@ -367,7 +403,9 @@ definitions: description: Defines a tag using name and type. properties: id: - description: The unique DB identifier of a tag + description: |- + The unique DB identifier of a tag + The unique DB identifier of a tag type: integer name: description: Tag Name @@ -378,6 +416,13 @@ definitions: example: Debug type: string type: object + schema.Unit: + properties: + base: + type: string + prefix: + type: string + type: object host: localhost:8080 info: contact: @@ -389,7 +434,7 @@ info: name: MIT License url: https://opensource.org/licenses/MIT title: ClusterCockpit REST API - version: 0.2.0 + version: "1" paths: /jobs/: get: @@ -456,6 +501,64 @@ paths: summary: Lists all jobs tags: - query + /jobs/{id}: + get: + consumes: + - application/json + description: |- + Job to get is specified by database ID + Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'. + parameters: + - description: Database ID of Job + in: path + name: id + required: true + type: integer + - description: Array of metric names + in: body + name: request + required: true + schema: + items: + type: string + type: array + produces: + - application/json + responses: + "200": + description: Job resource + schema: + $ref: '#/definitions/schema.JobMeta' + "400": + description: Bad Request + schema: + $ref: '#/definitions/api.ErrorResponse' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/api.ErrorResponse' + "403": + description: Forbidden + schema: + $ref: '#/definitions/api.ErrorResponse' + "404": + description: Resource not found + schema: + $ref: '#/definitions/api.ErrorResponse' + "422": + description: 'Unprocessable Entity: finding job failed: sql: no rows in + result set' + schema: + $ref: '#/definitions/api.ErrorResponse' + "500": + description: Internal Server Error + schema: + $ref: '#/definitions/api.ErrorResponse' + security: + - ApiKeyAuth: [] + summary: Get complete job meta and metric data + tags: + - query /jobs/delete_job/: delete: consumes: diff --git a/internal/api/docs.go b/internal/api/docs.go index af6745f..442993a 100644 --- a/internal/api/docs.go +++ b/internal/api/docs.go @@ -628,6 +628,91 @@ const docTemplate = `{ } } } + }, + "/jobs/{id}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "query" + ], + "summary": "Get complete job meta and metric data", + "parameters": [ + { + "type": "integer", + "description": "Database ID of Job", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "Array of metric names", + "name": "request", + "in": "body", + "required": true, + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + } + ], + "responses": { + "200": { + "description": "Job resource", + "schema": { + "$ref": "#/definitions/schema.JobMeta" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "404": { + "description": "Resource not found", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "422": { + "description": "Unprocessable Entity: finding job failed: sql: no rows in result set", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + } } }, "definitions": { @@ -771,6 +856,9 @@ const docTemplate = `{ "type": "string", "example": "fritz" }, + "concurrentJobs": { + "$ref": "#/definitions/schema.JobLinkResultList" + }, "duration": { "description": "Duration of job in seconds (Min \u003e 0)", "type": "integer", @@ -795,6 +883,14 @@ const docTemplate = `{ }, "jobState": { "description": "Final state of job", + "enum": [ + "completed", + "failed", + "cancelled", + "stopped", + "timeout", + "out_of_memory" + ], "allOf": [ { "$ref": "#/definitions/schema.JobState" @@ -823,7 +919,7 @@ const docTemplate = `{ "example": 2 }, "numHwthreads": { - "description": "Number of HWThreads used (Min \u003e 0)", + "description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 20 @@ -885,6 +981,31 @@ const docTemplate = `{ } } }, + "schema.JobLink": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "jobId": { + "type": "integer" + } + } + }, + "schema.JobLinkResultList": { + "type": "object", + "properties": { + "count": { + "type": "integer" + }, + "items": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.JobLink" + } + } + } + }, "schema.JobMeta": { "description": "Meta data information of a HPC job.", "type": "object", @@ -899,6 +1020,9 @@ const docTemplate = `{ "type": "string", "example": "fritz" }, + "concurrentJobs": { + "$ref": "#/definitions/schema.JobLinkResultList" + }, "duration": { "description": "Duration of job in seconds (Min \u003e 0)", "type": "integer", @@ -923,6 +1047,14 @@ const docTemplate = `{ }, "jobState": { "description": "Final state of job", + "enum": [ + "completed", + "failed", + "cancelled", + "stopped", + "timeout", + "out_of_memory" + ], "allOf": [ { "$ref": "#/definitions/schema.JobState" @@ -951,7 +1083,7 @@ const docTemplate = `{ "example": 2 }, "numHwthreads": { - "description": "Number of HWThreads used (Min \u003e 0)", + "description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 20 @@ -1068,9 +1200,7 @@ const docTemplate = `{ "example": 2000 }, "unit": { - "description": "Metric unit (see schema/unit.schema.json)", - "type": "string", - "example": "GHz" + "$ref": "#/definitions/schema.Unit" } } }, @@ -1107,7 +1237,7 @@ const docTemplate = `{ "type": "object", "properties": { "id": { - "description": "The unique DB identifier of a tag", + "description": "The unique DB identifier of a tag\nThe unique DB identifier of a tag", "type": "integer" }, "name": { @@ -1121,6 +1251,17 @@ const docTemplate = `{ "example": "Debug" } } + }, + "schema.Unit": { + "type": "object", + "properties": { + "base": { + "type": "string" + }, + "prefix": { + "type": "string" + } + } } }, "securityDefinitions": { @@ -1139,7 +1280,7 @@ const docTemplate = `{ // SwaggerInfo holds exported Swagger Info so clients can modify it var SwaggerInfo = &swag.Spec{ - Version: "0.2.0", + Version: "1", Host: "localhost:8080", BasePath: "/api", Schemes: []string{}, diff --git a/internal/api/rest.go b/internal/api/rest.go index ae9e8e9..0a4d227 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -23,6 +23,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/importer" + "github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/log" @@ -68,7 +69,7 @@ func (api *RestApi) MountRoutes(r *mux.Router) { // r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut) r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet) - // r.HandleFunc("/jobs/{id}", api.getJob).Methods(http.MethodGet) + r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodGet) r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch) r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet) r.HandleFunc("/jobs/delete_job/", api.deleteJobByRequest).Methods(http.MethodDelete) @@ -142,6 +143,13 @@ type ApiTag struct { type TagJobApiRequest []*ApiTag +type GetJobApiRequest []string + +type GetJobApiResponse struct { + Meta *schema.Job + Data []*model.JobMetricWithName +} + func handleError(err error, statusCode int, rw http.ResponseWriter) { log.Warnf("REST ERROR : %s", err.Error()) rw.Header().Add("Content-Type", "application/json") @@ -301,6 +309,99 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { } } +// getJobById godoc +// @summary Get complete job meta and metric data +// @tags query +// @description Job to get is specified by database ID +// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'. +// @accept json +// @produce json +// @param id path int true "Database ID of Job" +// @param request body api.GetJobApiRequest true "Array of metric names" +// @success 200 {object} schema.JobMeta "Job resource" +// @failure 400 {object} api.ErrorResponse "Bad Request" +// @failure 401 {object} api.ErrorResponse "Unauthorized" +// @failure 403 {object} api.ErrorResponse "Forbidden" +// @failure 404 {object} api.ErrorResponse "Resource not found" +// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set" +// @failure 500 {object} api.ErrorResponse "Internal Server Error" +// @security ApiKeyAuth +// @router /jobs/{id} [get] +func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) { + if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) { + handleError(fmt.Errorf("missing role: %v", + auth.GetRoleString(auth.RoleApi)), http.StatusForbidden, rw) + return + } + + // Fetch job from db + id, ok := mux.Vars(r)["id"] + var job *schema.Job + var err error + if ok { + id, e := strconv.ParseInt(id, 10, 64) + if e != nil { + handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw) + return + } + + job, err = api.JobRepository.FindById(id) + } else { + handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw) + return + } + if err != nil { + handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw) + return + } + + var metrics GetJobApiRequest + if err := decode(r.Body, &metrics); err != nil { + http.Error(rw, err.Error(), http.StatusBadRequest) + return + } + + var scopes []schema.MetricScope + + if job.NumNodes == 1 { + scopes = []schema.MetricScope{"core"} + } else { + scopes = []schema.MetricScope{"node"} + } + + data, err := metricdata.LoadData(job, metrics, scopes, r.Context()) + if err != nil { + log.Warn("Error while loading job data") + return + } + + res := []*model.JobMetricWithName{} + for name, md := range data { + for scope, metric := range md { + res = append(res, &model.JobMetricWithName{ + Name: name, + Scope: scope, + Metric: metric, + }) + } + } + + log.Debugf("/api/job/%d: get job %d", id, job.JobID) + rw.Header().Add("Content-Type", "application/json") + bw := bufio.NewWriter(rw) + defer bw.Flush() + + payload := GetJobApiResponse{ + Meta: job, + Data: res, + } + + if err := json.NewEncoder(bw).Encode(payload); err != nil { + handleError(err, http.StatusInternalServerError, rw) + return + } +} + // tagJob godoc // @summary Adds one or more tags to a job // @tags add and modify diff --git a/pkg/schema/job.go b/pkg/schema/job.go index d967dd0..23ae1ef 100644 --- a/pkg/schema/job.go +++ b/pkg/schema/job.go @@ -115,7 +115,7 @@ type Unit struct { // JobStatistics model // @Description Specification for job metric statistics. type JobStatistics struct { - Unit Unit `json:"unit" example:"GHz"` + Unit Unit `json:"unit"` Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum