From f39e04979b0d2b74d90c63a28364cc4a5b53f8a3 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 20 Dec 2024 09:29:46 +0100 Subject: [PATCH] Update, cleanup, and reformat --- README.md | 14 +- interfaces/graphql/README.md | 3 - interfaces/graphql/schema.graphqls | 303 -------- interfaces/lineprotocol/README.md | 88 +-- interfaces/rest/README.md | 2 +- interfaces/rest/swagger.json | 1030 +++++++++++++++++++++++----- interfaces/rest/swagger.yaml | 733 ++++++++++++++++---- schemas/README.md | 4 - schemas/jobs-sqlite.sql | 71 -- 9 files changed, 1528 insertions(+), 720 deletions(-) delete mode 100644 interfaces/graphql/README.md delete mode 100644 interfaces/graphql/schema.graphqls delete mode 100644 schemas/README.md delete mode 100644 schemas/jobs-sqlite.sql diff --git a/README.md b/README.md index 4d7cbf3..4b313b4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ # Specifications for datastructures, interfaces, and file formats -* **[Generic datastructure specifications](https://github.com/ClusterCockpit/cc-specifications/tree/master/datastructures)**: To be used in applications, as payloads in apis, or as file formats. -* **[APIs offered by ClusterCockpit](https://github.com/ClusterCockpit/cc-specifications/tree/master/interfaces)**: REST, GraphQL, and Influx Line protocol descriptions. -* **[HPC Job-Archive specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive)**: A directory tree and file format description for a file based HPC Job Performance archive. -* **[SQL Jobs Table Schema](https://github.com/ClusterCockpit/cc-specifications/tree/master/schemas)**: The SQL database schema used in cc-backend. +* **[Generic datastructure +specifications](https://github.com/ClusterCockpit/cc-specifications/tree/master/datastructures)**: +To be used in applications, as payloads in apis, or as file formats. +* **[APIs offered by +ClusterCockpit](https://github.com/ClusterCockpit/cc-specifications/tree/master/interfaces)**: +REST, and Influx Line protocol descriptions. +* **[HPC Job-Archive +specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive)**: +A directory tree and file format description for a file based HPC Job +Performance archive. diff --git a/interfaces/graphql/README.md b/interfaces/graphql/README.md deleted file mode 100644 index 73cac45..0000000 --- a/interfaces/graphql/README.md +++ /dev/null @@ -1,3 +0,0 @@ -## GraphQL Schema - -This schema is intended for communication between web-frontend and web-backend. diff --git a/interfaces/graphql/schema.graphqls b/interfaces/graphql/schema.graphqls deleted file mode 100644 index 71a5373..0000000 --- a/interfaces/graphql/schema.graphqls +++ /dev/null @@ -1,303 +0,0 @@ -scalar Time -scalar Any - -scalar NullableFloat -scalar MetricScope -scalar JobState - -type Job { - id: ID! - jobId: Int! - user: String! - project: String! - cluster: String! - subCluster: String! - startTime: Time! - duration: Int! - walltime: Int! - numNodes: Int! - numHWThreads: Int! - numAcc: Int! - SMT: Int! - exclusive: Int! - partition: String! - arrayJobId: Int! - monitoringStatus: Int! - state: JobState! - tags: [Tag!]! - resources: [Resource!]! - concurrentJobs: JobLinkResultList - - metaData: Any - userData: User -} - -type JobLink { - id: ID! - jobId: Int! -} - -type Cluster { - name: String! - partitions: [String!]! # Slurm partitions - metricConfig: [MetricConfig!]! - subClusters: [SubCluster!]! # Hardware partitions/subclusters -} - -type SubCluster { - name: String! - nodes: String! - numberOfNodes: Int! - processorType: String! - socketsPerNode: Int! - coresPerSocket: Int! - threadsPerCore: Int! - flopRateScalar: MetricValue! - flopRateSimd: MetricValue! - memoryBandwidth: MetricValue! - topology: Topology! -} - -type MetricValue { - unit: Unit! - value: Float! -} - -type Topology { - node: [Int!] - socket: [[Int!]!] - memoryDomain: [[Int!]!] - die: [[Int!]!] - core: [[Int!]!] - accelerators: [Accelerator!] -} - -type Accelerator { - id: String! - type: String! - model: String! -} - -type SubClusterConfig { - name: String! - peak: Float - normal: Float - caution: Float - alert: Float - remove: Boolean -} - -type MetricConfig { - name: String! - unit: Unit! - scope: MetricScope! - aggregation: String! - timestep: Int! - peak: Float! - normal: Float - caution: Float! - alert: Float! - subClusters: [SubClusterConfig!]! -} - -type Tag { - id: ID! - type: String! - name: String! -} - -type Resource { - hostname: String! - hwthreads: [Int!] - accelerators: [String!] - configuration: String -} - -type JobMetricWithName { - name: String! - scope: MetricScope! - metric: JobMetric! -} - -type JobMetric { - unit: Unit - timestep: Int! - series: [Series!] - statisticsSeries: StatsSeries -} - -type Series { - hostname: String! - id: String - statistics: MetricStatistics - data: [NullableFloat!]! -} - -type Unit { - base: String! - prefix: String -} - -type MetricStatistics { - avg: Float! - min: Float! - max: Float! -} - -type StatsSeries { - mean: [NullableFloat!]! - min: [NullableFloat!]! - max: [NullableFloat!]! -} - -type MetricFootprints { - metric: String! - data: [NullableFloat!]! -} - -type Footprints { - nodehours: [NullableFloat!]! - metrics: [MetricFootprints!]! -} - -enum Aggregate { USER, PROJECT, CLUSTER } -enum Weights { NODE_COUNT, NODE_HOURS } - -type NodeMetrics { - host: String! - subCluster: String! - metrics: [JobMetricWithName!]! -} - -type Count { - name: String! - count: Int! -} - -type User { - username: String! - name: String! - email: String! -} - -type Query { - clusters: [Cluster!]! # List of all clusters - tags: [Tag!]! # List of all tags - - user(username: String!): User - allocatedNodes(cluster: String!): [Count!]! - - job(id: ID!): Job - jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]! - jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints - - jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList! - jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]! - jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, weight: Weights, limit: Int): [Count!]! - - rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! - - nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! -} - -type Mutation { - createTag(type: String!, name: String!): Tag! - deleteTag(id: ID!): ID! - addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]! - removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]! - - updateConfiguration(name: String!, value: String!): String -} - -type IntRangeOutput { from: Int!, to: Int! } -type TimeRangeOutput { from: Time!, to: Time! } - -input JobFilter { - tags: [ID!] - jobId: StringInput - arrayJobId: Int - user: StringInput - project: StringInput - jobName: StringInput - cluster: StringInput - partition: StringInput - duration: IntRange - - minRunningFor: Int - - numNodes: IntRange - numAccelerators: IntRange - numHWThreads: IntRange - - startTime: TimeRange - state: [JobState!] - flopsAnyAvg: FloatRange - memBwAvg: FloatRange - loadAvg: FloatRange - memUsedMax: FloatRange - - exclusive: Int - sharedNode: StringInput - selfJobId: StringInput - selfStartTime: Time - selfDuration: Int -} - -input OrderByInput { - field: String! - order: SortDirectionEnum! = ASC -} - -enum SortDirectionEnum { - DESC - ASC -} - -input StringInput { - eq: String - neq: String - contains: String - startsWith: String - endsWith: String - in: [String!] -} - -input IntRange { from: Int!, to: Int! } -input FloatRange { from: Float!, to: Float! } -input TimeRange { from: Time, to: Time } - -type JobResultList { - items: [Job!]! - offset: Int - limit: Int - count: Int -} - -type JobLinkResultList { - items: [JobLink!]! - count: Int -} - -type HistoPoint { - count: Int! - value: Int! -} - -type JobsStatistics { - id: ID! # If `groupBy` was used, ID of the user/project/cluster - name: String! # if User-Statistics: Given Name of Account (ID) Owner - totalJobs: Int! # Number of jobs - runningJobs: Int! # Number of running jobs - shortJobs: Int! # Number of jobs with a duration of less than duration - totalWalltime: Int! # Sum of the duration of all matched jobs in hours - totalNodeHours: Int! # Sum of the node hours of all matched jobs - totalCoreHours: Int! # Sum of the core hours of all matched jobs - totalAccHours: Int! # Sum of the gpu hours of all matched jobs - histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value - histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes -} - -input PageRequest { - itemsPerPage: Int! - page: Int! -} diff --git a/interfaces/lineprotocol/README.md b/interfaces/lineprotocol/README.md index 2c68650..df97182 100644 --- a/interfaces/lineprotocol/README.md +++ b/interfaces/lineprotocol/README.md @@ -1,63 +1,75 @@ -# Overview +# InfluxData line-protocol flavor + +## Overview ClusterCockpit uses the [InfluxData line-protocol](https://docs.influxdata.com/influxdb/v2.1/reference/syntax/line-protocol/) for transferring messages between its components. The line-protocol is a text-based representation of a metric/event with a value, time and describing tags. All metrics/events have the following format (if written to `stdout`): - ``` , ``` -where `` and `` are comma-separated lists of `key=value` entries. In a mind-model, think about tags as `indices` in the database for faster lookup and the `` as values. +where `` and `` are comma-separated lists of `key=value` +entries. In a mind-model, think about tags as `indices` in the database for +faster lookup and the `` as values. -**Remark**: In the first iteration, we only sent metrics (number values) but we had to extend the specification to messages with different meanings. The below text was changes accordingly. The update is downward-compatible, so for metrics (number values), nothing changed. +**Remark**: In the first iteration, we only sent metrics (number values) but we +had to extend the specification to messages with different meanings. The below +text was changes accordingly. The update is downward-compatible, so for metrics +(number values), nothing changed. +## Line-protocol in the ClusterCockpit ecosystem -# Line-protocol in the ClusterCockpit ecosystem +In ClusterCockpit we limit the flexibility of the InfluxData line-protocol +slightly. The idea is to keep the format evaluatable by different components. -In ClusterCockpit we limit the flexibility of the InfluxData line-protocol slightly. The idea is to keep the format evaluatable by different components. +Each message is identifiable by the `measurement` (= metric name), the +`hostname`, the `type` and, if required, a `type-id`. -Each message is identifiable by the `measurement` (= metric name), the `hostname`, the `type` and, if required, a `type-id`. +### Mandatory tags per message - - -## Mandatory tags per message: * `hostname` * `type` - - `node` - - `socket` - - `die` - - `memoryDomain` - - `llc` - - `core` - - `hwthread` - - `accelerator` + * `node` + * `socket` + * `die` + * `memoryDomain` + * `llc` + * `core` + * `hwthread` + * `accelerator` * `type-id` for further specifying the type like CPU socket or HW Thread identifier Although no `type-id` is required if `type=node`, it is recommended to send `type=node,type-id=0`. -### Optional tags depending on the message: +#### Optional tags depending on the message -In some cases, optional tags are required like `filesystem`, `device` or `version`. While you are free to do that, the ClusterCockpit components in the stack above will recognize `stype` (= "sub type") and `stype-id`. So `filesystem=/homes` should be better specified as `stype=filesystem,stype-id=/homes`. +In some cases, optional tags are required like `filesystem`, `device` or +`version`. While you are free to do that, the ClusterCockpit components in the +stack above will recognize `stype` (= "sub type") and `stype-id`. So +`filesystem=/homes` should be better specified as +`stype=filesystem,stype-id=/homes`. -## Mandatory fields per measurement: +### Mandatory fields per measurement -- Metric: The field key is always `value` -- Event: The field key is always `event` -- Log message: The field key is always `log` -- Control message: The field key is always `log` +* Metric: The field key is always `value` +* Event: The field key is always `event` +* Log message: The field key is always `log` +* Control message: The field key is always `log` No other field keys are evaluated by the ClusterCockpit ecosystem. +### Message types -## Message types +There exist different message types in the ClusterCockpit ecosystem, all +specified using the InfluxData line-protocol. -There exist different message types in the ClusterCockpit ecosystem, all specified using the InfluxData line-protocol. - -### Metrics +#### Metrics **Identification:** `value=X` field with `X` being a number -While the measurements (metric names) can be chosen freely, there is a basic set of measurements which should be present as long as you navigate in the ClusterCockpit ecosystem +While the measurements (metric names) can be chosen freely, there is a basic set +of measurements which should be present as long as you navigate in the +ClusterCockpit ecosystem * `flops_sp`: Single-precision floating point rate in `Flops/s` * `flops_dp`: Double-precision floating point rate in `Flops/s` @@ -73,19 +85,17 @@ While the measurements (metric names) can be chosen freely, there is a basic set For the whole list, see [job-data schema](../../datastructures/job-data.schema.json) - -### Events +#### Events **Identification:** `event="X"` field with `"X"` being a string -### Controls +#### Controls -**Identification:** -- `control="X"` field with `"X"` being a string -- `method` tag is either `GET` or `PUT` +**Identification:** -### Logs +* `control="X"` field with `"X"` being a string +* `method` tag is either `GET` or `PUT` + +#### Logs **Identification:** `log="X"` field with `"X"` being a string - - diff --git a/interfaces/rest/README.md b/interfaces/rest/README.md index cb701bc..4e4744e 100644 --- a/interfaces/rest/README.md +++ b/interfaces/rest/README.md @@ -1 +1 @@ -## REST API interfaces +# REST API interfaces diff --git a/interfaces/rest/swagger.json b/interfaces/rest/swagger.json index 87a7de5..3b59b5e 100644 --- a/interfaces/rest/swagger.json +++ b/interfaces/rest/swagger.json @@ -12,11 +12,68 @@ "name": "MIT License", "url": "https://opensource.org/licenses/MIT" }, - "version": "1" + "version": "1.0.0" }, "host": "localhost:8080", "basePath": "/api", "paths": { + "/clusters/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Get a list of all cluster configs. Specific cluster can be requested using query parameter.", + "produces": [ + "application/json" + ], + "tags": [ + "Cluster query" + ], + "summary": "Lists all cluster configs", + "parameters": [ + { + "type": "string", + "description": "Job Cluster", + "name": "cluster", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Array of clusters", + "schema": { + "$ref": "#/definitions/api.GetClustersApiResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + } + }, "/jobs/": { "get": { "security": [ @@ -29,7 +86,7 @@ "application/json" ], "tags": [ - "query" + "Job query" ], "summary": "Lists all jobs", "parameters": [ @@ -127,7 +184,7 @@ "application/json" ], "tags": [ - "remove" + "Job remove" ], "summary": "Remove a job from the sql database", "parameters": [ @@ -199,7 +256,7 @@ "application/json" ], "tags": [ - "remove" + "Job remove" ], "summary": "Remove a job from the sql database", "parameters": [ @@ -269,7 +326,7 @@ "application/json" ], "tags": [ - "remove" + "Job remove" ], "summary": "Remove a job from the sql database", "parameters": [ @@ -327,6 +384,76 @@ } } }, + "/jobs/edit_meta/{id}": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Edit key value pairs in job metadata json\nIf a key already exists its content will be overwritten", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "Job add and modify" + ], + "summary": "Edit meta-data json", + "parameters": [ + { + "type": "integer", + "description": "Job Database ID", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "Kay value pair to add", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/api.EditMetaRequest" + } + } + ], + "responses": { + "200": { + "description": "Updated job resource", + "schema": { + "$ref": "#/definitions/schema.Job" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "404": { + "description": "Job does not exist", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + } + }, "/jobs/start_job/": { "post": { "security": [ @@ -342,7 +469,7 @@ "application/json" ], "tags": [ - "add and modify" + "Job add and modify" ], "summary": "Adds a new job as \"running\"", "parameters": [ @@ -408,7 +535,7 @@ "application/json" ], "tags": [ - "add and modify" + "Job add and modify" ], "summary": "Marks job as completed and triggers archiving", "parameters": [ @@ -468,88 +595,6 @@ } } }, - "/jobs/stop_job/{id}": { - "post": { - "security": [ - { - "ApiKeyAuth": [] - } - ], - "description": "Job to stop is specified by database ID. Only stopTime and final state are required in request body.\nReturns full job resource information according to 'JobMeta' scheme.", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "add and modify" - ], - "summary": "Marks job as completed and triggers archiving", - "parameters": [ - { - "type": "integer", - "description": "Database ID of Job", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "stopTime and final state in request body", - "name": "request", - "in": "body", - "required": true, - "schema": { - "$ref": "#/definitions/api.StopJobApiRequest" - } - } - ], - "responses": { - "200": { - "description": "Job resource", - "schema": { - "$ref": "#/definitions/schema.JobMeta" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "$ref": "#/definitions/api.ErrorResponse" - } - }, - "401": { - "description": "Unauthorized", - "schema": { - "$ref": "#/definitions/api.ErrorResponse" - } - }, - "403": { - "description": "Forbidden", - "schema": { - "$ref": "#/definitions/api.ErrorResponse" - } - }, - "404": { - "description": "Resource not found", - "schema": { - "$ref": "#/definitions/api.ErrorResponse" - } - }, - "422": { - "description": "Unprocessable Entity: finding job failed: sql: no rows in result set", - "schema": { - "$ref": "#/definitions/api.ErrorResponse" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "$ref": "#/definitions/api.ErrorResponse" - } - } - } - } - }, "/jobs/tag_job/{id}": { "post": { "security": [ @@ -557,7 +602,7 @@ "ApiKeyAuth": [] } ], - "description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nIf tagged job is already finished: Tag will be written directly to respective archive files.", + "description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nTag Scope for frontend visibility will default to \"global\" if none entered, other options: \"admin\" or specific username.\nIf tagged job is already finished: Tag will be written directly to respective archive files.", "consumes": [ "application/json" ], @@ -565,7 +610,7 @@ "application/json" ], "tags": [ - "add and modify" + "Job add and modify" ], "summary": "Adds one or more tags to a job", "parameters": [ @@ -624,6 +669,80 @@ } }, "/jobs/{id}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.", + "produces": [ + "application/json" + ], + "tags": [ + "Job query" + ], + "summary": "Get job meta and optional all metric data", + "parameters": [ + { + "type": "integer", + "description": "Database ID of Job", + "name": "id", + "in": "path", + "required": true + }, + { + "type": "boolean", + "description": "Include all available metrics", + "name": "all-metrics", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Job resource", + "schema": { + "$ref": "#/definitions/api.GetJobApiResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "404": { + "description": "Resource not found", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "422": { + "description": "Unprocessable Entity: finding job failed: sql: no rows in result set", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + }, "post": { "security": [ { @@ -638,9 +757,9 @@ "application/json" ], "tags": [ - "query" + "Job query" ], - "summary": "Get complete job meta and metric data", + "summary": "Get job meta and configurable metric data", "parameters": [ { "type": "integer", @@ -707,9 +826,367 @@ } } } + }, + "/user/{id}": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Modifies user defined by username (id) in one of four possible ways.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.", + "consumes": [ + "multipart/form-data" + ], + "produces": [ + "text/plain" + ], + "tags": [ + "User" + ], + "summary": "Updates an existing user", + "parameters": [ + { + "type": "string", + "description": "Database ID of User", + "name": "id", + "in": "path", + "required": true + }, + { + "enum": [ + "admin", + "support", + "manager", + "user", + "api" + ], + "type": "string", + "description": "Priority 1: Role to add", + "name": "add-role", + "in": "formData" + }, + { + "enum": [ + "admin", + "support", + "manager", + "user", + "api" + ], + "type": "string", + "description": "Priority 2: Role to remove", + "name": "remove-role", + "in": "formData" + }, + { + "type": "string", + "description": "Priority 3: Project to add", + "name": "add-project", + "in": "formData" + }, + { + "type": "string", + "description": "Priority 4: Project to remove", + "name": "remove-project", + "in": "formData" + } + ], + "responses": { + "200": { + "description": "Success Response Message", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "type": "string" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "type": "string" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "type": "string" + } + }, + "422": { + "description": "Unprocessable Entity: The user could not be updated", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/users/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.", + "produces": [ + "application/json" + ], + "tags": [ + "User" + ], + "summary": "Returns a list of users", + "parameters": [ + { + "type": "boolean", + "description": "If returned list should contain all users or only users with additional special roles", + "name": "not-just-user", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "List of users returned successfully", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/api.ApiReturnedUser" + } + } + }, + "400": { + "description": "Bad Request", + "schema": { + "type": "string" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "type": "string" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "User specified in form data will be saved to database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.", + "consumes": [ + "multipart/form-data" + ], + "produces": [ + "text/plain" + ], + "tags": [ + "User" + ], + "summary": "Adds a new user", + "parameters": [ + { + "type": "string", + "description": "Unique user ID", + "name": "username", + "in": "formData", + "required": true + }, + { + "type": "string", + "description": "User password", + "name": "password", + "in": "formData", + "required": true + }, + { + "enum": [ + "admin", + "support", + "manager", + "user", + "api" + ], + "type": "string", + "description": "User role", + "name": "role", + "in": "formData", + "required": true + }, + { + "type": "string", + "description": "Managed project, required for new manager role user", + "name": "project", + "in": "formData" + }, + { + "type": "string", + "description": "Users name", + "name": "name", + "in": "formData" + }, + { + "type": "string", + "description": "Users email", + "name": "email", + "in": "formData" + } + ], + "responses": { + "200": { + "description": "Success Response", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "type": "string" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "type": "string" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "type": "string" + } + }, + "422": { + "description": "Unprocessable Entity: creating user failed", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "delete": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "User defined by username in form data will be deleted from database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.", + "consumes": [ + "multipart/form-data" + ], + "produces": [ + "text/plain" + ], + "tags": [ + "User" + ], + "summary": "Deletes a user", + "parameters": [ + { + "type": "string", + "description": "User ID to delete", + "name": "username", + "in": "formData", + "required": true + } + ], + "responses": { + "200": { + "description": "User deleted successfully" + }, + "400": { + "description": "Bad Request", + "schema": { + "type": "string" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "type": "string" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "type": "string" + } + }, + "422": { + "description": "Unprocessable Entity: deleting user failed", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } } }, "definitions": { + "api.ApiReturnedUser": { + "type": "object", + "properties": { + "email": { + "type": "string" + }, + "name": { + "type": "string" + }, + "projects": { + "type": "array", + "items": { + "type": "string" + } + }, + "roles": { + "type": "array", + "items": { + "type": "string" + } + }, + "username": { + "type": "string" + } + } + }, "api.ApiTag": { "type": "object", "properties": { @@ -718,6 +1195,11 @@ "type": "string", "example": "Testjob" }, + "scope": { + "description": "Tag Scope for Frontend Display", + "type": "string", + "example": "global" + }, "type": { "description": "Tag Type", "type": "string", @@ -756,6 +1238,19 @@ } } }, + "api.EditMetaRequest": { + "type": "object", + "properties": { + "key": { + "type": "string", + "example": "jobScript" + }, + "value": { + "type": "string", + "example": "bash script" + } + } + }, "api.ErrorResponse": { "type": "object", "properties": { @@ -769,6 +1264,18 @@ } } }, + "api.GetClustersApiResponse": { + "type": "object", + "properties": { + "clusters": { + "description": "Array of clusters", + "type": "array", + "items": { + "$ref": "#/definitions/schema.Cluster" + } + } + } + }, "api.GetJobApiResponse": { "type": "object", "properties": { @@ -820,9 +1327,8 @@ "api.StartJobApiResponse": { "type": "object", "properties": { - "id": { - "description": "Database ID of new job", - "type": "integer" + "msg": { + "type": "string" } } }, @@ -834,17 +1340,14 @@ ], "properties": { "cluster": { - "description": "Cluster of job", "type": "string", "example": "fritz" }, "jobId": { - "description": "Cluster Job ID of job", "type": "integer", "example": 123000 }, "jobState": { - "description": "Final job state", "allOf": [ { "$ref": "#/definitions/schema.JobState" @@ -853,28 +1356,58 @@ "example": "completed" }, "startTime": { - "description": "Start Time of job as epoch", "type": "integer", "example": 1649723812 }, "stopTime": { - "description": "Stop Time of job as epoch", "type": "integer", "example": 1649763839 } } }, + "schema.Accelerator": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "type": { + "type": "string" + } + } + }, + "schema.Cluster": { + "type": "object", + "properties": { + "metricConfig": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.MetricConfig" + } + }, + "name": { + "type": "string" + }, + "subClusters": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.SubCluster" + } + } + } + }, "schema.Job": { "description": "Information of a HPC job.", "type": "object", "properties": { "arrayJobId": { - "description": "The unique identifier of an array job", "type": "integer", "example": 123000 }, "cluster": { - "description": "The unique identifier of a cluster", "type": "string", "example": "fritz" }, @@ -882,29 +1415,39 @@ "$ref": "#/definitions/schema.JobLinkResultList" }, "duration": { - "description": "Duration of job in seconds (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 43200 }, + "energy": { + "type": "number" + }, + "energyFootprint": { + "type": "object", + "additionalProperties": { + "type": "number" + } + }, "exclusive": { - "description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user", "type": "integer", "maximum": 2, "minimum": 0, "example": 1 }, + "footprint": { + "type": "object", + "additionalProperties": { + "type": "number" + } + }, "id": { - "description": "The unique identifier of a job in the database", "type": "integer" }, "jobId": { - "description": "The unique identifier of a job", "type": "integer", "example": 123000 }, "jobState": { - "description": "Final state of job", "enum": [ "completed", "failed", @@ -921,82 +1464,68 @@ "example": "completed" }, "metaData": { - "description": "Additional information about the job", "type": "object", "additionalProperties": { "type": "string" } }, "monitoringStatus": { - "description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull", "type": "integer", "maximum": 3, "minimum": 0, "example": 1 }, "numAcc": { - "description": "Number of accelerators used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 2 }, "numHwthreads": { - "description": "NumCores int32 `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` // Number of HWThreads used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 20 }, "numNodes": { - "description": "Number of nodes used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 2 }, "partition": { - "description": "The Slurm partition to which the job was submitted", "type": "string", "example": "main" }, "project": { - "description": "The unique identifier of a project", "type": "string", "example": "abcd200" }, "resources": { - "description": "Resources used by job", "type": "array", "items": { "$ref": "#/definitions/schema.Resource" } }, "smt": { - "description": "SMT threads used by job", "type": "integer", "example": 4 }, "startTime": { - "description": "Start time as 'time.Time' data type", "type": "string" }, "subCluster": { - "description": "The unique identifier of a sub cluster", "type": "string", "example": "main" }, "tags": { - "description": "List of tags", "type": "array", "items": { "$ref": "#/definitions/schema.Tag" } }, "user": { - "description": "The unique identifier of a user", "type": "string", "example": "abcd100h" }, "walltime": { - "description": "Requested walltime of job in seconds (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 86400 @@ -1033,12 +1562,10 @@ "type": "object", "properties": { "arrayJobId": { - "description": "The unique identifier of an array job", "type": "integer", "example": 123000 }, "cluster": { - "description": "The unique identifier of a cluster", "type": "string", "example": "fritz" }, @@ -1046,29 +1573,39 @@ "$ref": "#/definitions/schema.JobLinkResultList" }, "duration": { - "description": "Duration of job in seconds (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 43200 }, + "energy": { + "type": "number" + }, + "energyFootprint": { + "type": "object", + "additionalProperties": { + "type": "number" + } + }, "exclusive": { - "description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user", "type": "integer", "maximum": 2, "minimum": 0, "example": 1 }, + "footprint": { + "type": "object", + "additionalProperties": { + "type": "number" + } + }, "id": { - "description": "The unique identifier of a job in the database", "type": "integer" }, "jobId": { - "description": "The unique identifier of a job", "type": "integer", "example": 123000 }, "jobState": { - "description": "Final state of job", "enum": [ "completed", "failed", @@ -1085,91 +1622,76 @@ "example": "completed" }, "metaData": { - "description": "Additional information about the job", "type": "object", "additionalProperties": { "type": "string" } }, "monitoringStatus": { - "description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull", "type": "integer", "maximum": 3, "minimum": 0, "example": 1 }, "numAcc": { - "description": "Number of accelerators used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 2 }, "numHwthreads": { - "description": "NumCores int32 `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` // Number of HWThreads used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 20 }, "numNodes": { - "description": "Number of nodes used (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 2 }, "partition": { - "description": "The Slurm partition to which the job was submitted", "type": "string", "example": "main" }, "project": { - "description": "The unique identifier of a project", "type": "string", "example": "abcd200" }, "resources": { - "description": "Resources used by job", "type": "array", "items": { "$ref": "#/definitions/schema.Resource" } }, "smt": { - "description": "SMT threads used by job", "type": "integer", "example": 4 }, "startTime": { - "description": "Start epoch time stamp in seconds (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 1649723812 }, "statistics": { - "description": "Metric statistics of job", "type": "object", "additionalProperties": { "$ref": "#/definitions/schema.JobStatistics" } }, "subCluster": { - "description": "The unique identifier of a sub cluster", "type": "string", "example": "main" }, "tags": { - "description": "List of tags", "type": "array", "items": { "$ref": "#/definitions/schema.Tag" } }, "user": { - "description": "The unique identifier of a user", "type": "string", "example": "abcd100h" }, "walltime": { - "description": "Requested walltime of job in seconds (Min \u003e 0)", "type": "integer", "minimum": 1, "example": 86400 @@ -1246,6 +1768,53 @@ } } }, + "schema.MetricConfig": { + "type": "object", + "properties": { + "aggregation": { + "type": "string" + }, + "alert": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "energy": { + "type": "string" + }, + "footprint": { + "type": "string" + }, + "lowerIsBetter": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "normal": { + "type": "number" + }, + "peak": { + "type": "number" + }, + "scope": { + "$ref": "#/definitions/schema.MetricScope" + }, + "subClusters": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.SubClusterConfig" + } + }, + "timestep": { + "type": "integer" + }, + "unit": { + "$ref": "#/definitions/schema.Unit" + } + } + }, "schema.MetricScope": { "type": "string", "enum": [ @@ -1281,27 +1850,34 @@ } } }, + "schema.MetricValue": { + "type": "object", + "properties": { + "unit": { + "$ref": "#/definitions/schema.Unit" + }, + "value": { + "type": "number" + } + } + }, "schema.Resource": { "description": "A resource used by a job", "type": "object", "properties": { "accelerators": { - "description": "List of of accelerator device ids", "type": "array", "items": { "type": "string" } }, "configuration": { - "description": "The configuration options of the node", "type": "string" }, "hostname": { - "description": "Name of the host (= node)", "type": "string" }, "hwthreads": { - "description": "List of OS processor ids", "type": "array", "items": { "type": "integer" @@ -1344,6 +1920,12 @@ "type": "number" } }, + "median": { + "type": "array", + "items": { + "type": "number" + } + }, "min": { "type": "array", "items": { @@ -1361,26 +1943,165 @@ } } }, + "schema.SubCluster": { + "type": "object", + "properties": { + "coresPerSocket": { + "type": "integer" + }, + "energyFootprint": { + "type": "array", + "items": { + "type": "string" + } + }, + "flopRateScalar": { + "$ref": "#/definitions/schema.MetricValue" + }, + "flopRateSimd": { + "$ref": "#/definitions/schema.MetricValue" + }, + "footprint": { + "type": "array", + "items": { + "type": "string" + } + }, + "memoryBandwidth": { + "$ref": "#/definitions/schema.MetricValue" + }, + "metricConfig": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.MetricConfig" + } + }, + "name": { + "type": "string" + }, + "nodes": { + "type": "string" + }, + "processorType": { + "type": "string" + }, + "socketsPerNode": { + "type": "integer" + }, + "threadsPerCore": { + "type": "integer" + }, + "topology": { + "$ref": "#/definitions/schema.Topology" + } + } + }, + "schema.SubClusterConfig": { + "type": "object", + "properties": { + "alert": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "energy": { + "type": "string" + }, + "footprint": { + "type": "string" + }, + "lowerIsBetter": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "normal": { + "type": "number" + }, + "peak": { + "type": "number" + }, + "remove": { + "type": "boolean" + } + } + }, "schema.Tag": { "description": "Defines a tag using name and type.", "type": "object", "properties": { "id": { - "description": "The unique DB identifier of a tag\nThe unique DB identifier of a tag", "type": "integer" }, "name": { - "description": "Tag Name", "type": "string", "example": "Testjob" }, + "scope": { + "type": "string", + "example": "global" + }, "type": { - "description": "Tag Type", "type": "string", "example": "Debug" } } }, + "schema.Topology": { + "type": "object", + "properties": { + "accelerators": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Accelerator" + } + }, + "core": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "die": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "memoryDomain": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "node": { + "type": "array", + "items": { + "type": "integer" + } + }, + "socket": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, "schema.Unit": { "type": "object", "properties": { @@ -1399,10 +2120,5 @@ "name": "X-Auth-Token", "in": "header" } - }, - "tags": [ - { - "name": "Job API" - } - ] + } } \ No newline at end of file diff --git a/interfaces/rest/swagger.yaml b/interfaces/rest/swagger.yaml index 093266d..4e3c47e 100644 --- a/interfaces/rest/swagger.yaml +++ b/interfaces/rest/swagger.yaml @@ -1,11 +1,32 @@ basePath: /api definitions: + api.ApiReturnedUser: + properties: + email: + type: string + name: + type: string + projects: + items: + type: string + type: array + roles: + items: + type: string + type: array + username: + type: string + type: object api.ApiTag: properties: name: description: Tag Name example: Testjob type: string + scope: + description: Tag Scope for Frontend Display + example: global + type: string type: description: Tag Type example: Debug @@ -33,6 +54,15 @@ definitions: msg: type: string type: object + api.EditMetaRequest: + properties: + key: + example: jobScript + type: string + value: + example: bash script + type: string + type: object api.ErrorResponse: properties: error: @@ -42,6 +72,14 @@ definitions: description: Statustext of Errorcode type: string type: object + api.GetClustersApiResponse: + properties: + clusters: + description: Array of clusters + items: + $ref: '#/definitions/schema.Cluster' + type: array + type: object api.GetJobApiResponse: properties: data: @@ -76,74 +114,91 @@ definitions: type: object api.StartJobApiResponse: properties: - id: - description: Database ID of new job - type: integer + msg: + type: string type: object api.StopJobApiRequest: properties: cluster: - description: Cluster of job example: fritz type: string jobId: - description: Cluster Job ID of job example: 123000 type: integer jobState: allOf: - $ref: '#/definitions/schema.JobState' - description: Final job state example: completed startTime: - description: Start Time of job as epoch example: 1649723812 type: integer stopTime: - description: Stop Time of job as epoch example: 1649763839 type: integer required: - jobState - stopTime type: object + schema.Accelerator: + properties: + id: + type: string + model: + type: string + type: + type: string + type: object + schema.Cluster: + properties: + metricConfig: + items: + $ref: '#/definitions/schema.MetricConfig' + type: array + name: + type: string + subClusters: + items: + $ref: '#/definitions/schema.SubCluster' + type: array + type: object schema.Job: description: Information of a HPC job. properties: arrayJobId: - description: The unique identifier of an array job example: 123000 type: integer cluster: - description: The unique identifier of a cluster example: fritz type: string concurrentJobs: $ref: '#/definitions/schema.JobLinkResultList' duration: - description: Duration of job in seconds (Min > 0) example: 43200 minimum: 1 type: integer + energy: + type: number + energyFootprint: + additionalProperties: + type: number + type: object exclusive: - description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs - of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple - jobs of same user' example: 1 maximum: 2 minimum: 0 type: integer + footprint: + additionalProperties: + type: number + type: object id: - description: The unique identifier of a job in the database type: integer jobId: - description: The unique identifier of a job example: 123000 type: integer jobState: allOf: - $ref: '#/definitions/schema.JobState' - description: Final state of job enum: - completed - failed @@ -155,67 +210,50 @@ definitions: metaData: additionalProperties: type: string - description: Additional information about the job type: object monitoringStatus: - description: 'State of monitoring system during job run: 0 - Disabled, 1 - - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull' example: 1 maximum: 3 minimum: 0 type: integer numAcc: - description: Number of accelerators used (Min > 0) example: 2 minimum: 1 type: integer numHwthreads: - description: NumCores int32 `json:"numCores" db:"num_cores" - example:"20" minimum:"1"` // - Number of HWThreads used (Min > 0) example: 20 minimum: 1 type: integer numNodes: - description: Number of nodes used (Min > 0) example: 2 minimum: 1 type: integer partition: - description: The Slurm partition to which the job was submitted example: main type: string project: - description: The unique identifier of a project example: abcd200 type: string resources: - description: Resources used by job items: $ref: '#/definitions/schema.Resource' type: array smt: - description: SMT threads used by job example: 4 type: integer startTime: - description: Start time as 'time.Time' data type type: string subCluster: - description: The unique identifier of a sub cluster example: main type: string tags: - description: List of tags items: $ref: '#/definitions/schema.Tag' type: array user: - description: The unique identifier of a user example: abcd100h type: string walltime: - description: Requested walltime of job in seconds (Min > 0) example: 86400 minimum: 1 type: integer @@ -240,39 +278,40 @@ definitions: description: Meta data information of a HPC job. properties: arrayJobId: - description: The unique identifier of an array job example: 123000 type: integer cluster: - description: The unique identifier of a cluster example: fritz type: string concurrentJobs: $ref: '#/definitions/schema.JobLinkResultList' duration: - description: Duration of job in seconds (Min > 0) example: 43200 minimum: 1 type: integer + energy: + type: number + energyFootprint: + additionalProperties: + type: number + type: object exclusive: - description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs - of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple - jobs of same user' example: 1 maximum: 2 minimum: 0 type: integer + footprint: + additionalProperties: + type: number + type: object id: - description: The unique identifier of a job in the database type: integer jobId: - description: The unique identifier of a job example: 123000 type: integer jobState: allOf: - $ref: '#/definitions/schema.JobState' - description: Final state of job enum: - completed - failed @@ -284,74 +323,56 @@ definitions: metaData: additionalProperties: type: string - description: Additional information about the job type: object monitoringStatus: - description: 'State of monitoring system during job run: 0 - Disabled, 1 - - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull' example: 1 maximum: 3 minimum: 0 type: integer numAcc: - description: Number of accelerators used (Min > 0) example: 2 minimum: 1 type: integer numHwthreads: - description: NumCores int32 `json:"numCores" db:"num_cores" - example:"20" minimum:"1"` // - Number of HWThreads used (Min > 0) example: 20 minimum: 1 type: integer numNodes: - description: Number of nodes used (Min > 0) example: 2 minimum: 1 type: integer partition: - description: The Slurm partition to which the job was submitted example: main type: string project: - description: The unique identifier of a project example: abcd200 type: string resources: - description: Resources used by job items: $ref: '#/definitions/schema.Resource' type: array smt: - description: SMT threads used by job example: 4 type: integer startTime: - description: Start epoch time stamp in seconds (Min > 0) example: 1649723812 minimum: 1 type: integer statistics: additionalProperties: $ref: '#/definitions/schema.JobStatistics' - description: Metric statistics of job type: object subCluster: - description: The unique identifier of a sub cluster example: main type: string tags: - description: List of tags items: $ref: '#/definitions/schema.Tag' type: array user: - description: The unique identifier of a user example: abcd100h type: string walltime: - description: Requested walltime of job in seconds (Min > 0) example: 86400 minimum: 1 type: integer @@ -410,6 +431,37 @@ definitions: unit: $ref: '#/definitions/schema.Unit' type: object + schema.MetricConfig: + properties: + aggregation: + type: string + alert: + type: number + caution: + type: number + energy: + type: string + footprint: + type: string + lowerIsBetter: + type: boolean + name: + type: string + normal: + type: number + peak: + type: number + scope: + $ref: '#/definitions/schema.MetricScope' + subClusters: + items: + $ref: '#/definitions/schema.SubClusterConfig' + type: array + timestep: + type: integer + unit: + $ref: '#/definitions/schema.Unit' + type: object schema.MetricScope: enum: - invalid_scope @@ -437,22 +489,25 @@ definitions: min: type: number type: object + schema.MetricValue: + properties: + unit: + $ref: '#/definitions/schema.Unit' + value: + type: number + type: object schema.Resource: description: A resource used by a job properties: accelerators: - description: List of of accelerator device ids items: type: string type: array configuration: - description: The configuration options of the node type: string hostname: - description: Name of the host (= node) type: string hwthreads: - description: List of OS processor ids items: type: integer type: array @@ -480,6 +535,10 @@ definitions: items: type: number type: array + median: + items: + type: number + type: array min: items: type: number @@ -491,23 +550,112 @@ definitions: type: array type: object type: object + schema.SubCluster: + properties: + coresPerSocket: + type: integer + energyFootprint: + items: + type: string + type: array + flopRateScalar: + $ref: '#/definitions/schema.MetricValue' + flopRateSimd: + $ref: '#/definitions/schema.MetricValue' + footprint: + items: + type: string + type: array + memoryBandwidth: + $ref: '#/definitions/schema.MetricValue' + metricConfig: + items: + $ref: '#/definitions/schema.MetricConfig' + type: array + name: + type: string + nodes: + type: string + processorType: + type: string + socketsPerNode: + type: integer + threadsPerCore: + type: integer + topology: + $ref: '#/definitions/schema.Topology' + type: object + schema.SubClusterConfig: + properties: + alert: + type: number + caution: + type: number + energy: + type: string + footprint: + type: string + lowerIsBetter: + type: boolean + name: + type: string + normal: + type: number + peak: + type: number + remove: + type: boolean + type: object schema.Tag: description: Defines a tag using name and type. properties: id: - description: |- - The unique DB identifier of a tag - The unique DB identifier of a tag type: integer name: - description: Tag Name example: Testjob type: string + scope: + example: global + type: string type: - description: Tag Type example: Debug type: string type: object + schema.Topology: + properties: + accelerators: + items: + $ref: '#/definitions/schema.Accelerator' + type: array + core: + items: + items: + type: integer + type: array + type: array + die: + items: + items: + type: integer + type: array + type: array + memoryDomain: + items: + items: + type: integer + type: array + type: array + node: + items: + type: integer + type: array + socket: + items: + items: + type: integer + type: array + type: array + type: object schema.Unit: properties: base: @@ -526,8 +674,45 @@ info: name: MIT License url: https://opensource.org/licenses/MIT title: ClusterCockpit REST API - version: "1" + version: 1.0.0 paths: + /clusters/: + get: + description: Get a list of all cluster configs. Specific cluster can be requested + using query parameter. + parameters: + - description: Job Cluster + in: query + name: cluster + type: string + produces: + - application/json + responses: + "200": + description: Array of clusters + schema: + $ref: '#/definitions/api.GetClustersApiResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/api.ErrorResponse' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/api.ErrorResponse' + "403": + description: Forbidden + schema: + $ref: '#/definitions/api.ErrorResponse' + "500": + description: Internal Server Error + schema: + $ref: '#/definitions/api.ErrorResponse' + security: + - ApiKeyAuth: [] + summary: Lists all cluster configs + tags: + - Cluster query /jobs/: get: description: |- @@ -592,8 +777,59 @@ paths: - ApiKeyAuth: [] summary: Lists all jobs tags: - - query + - Job query /jobs/{id}: + get: + description: |- + Job to get is specified by database ID + Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'. + parameters: + - description: Database ID of Job + in: path + name: id + required: true + type: integer + - description: Include all available metrics + in: query + name: all-metrics + type: boolean + produces: + - application/json + responses: + "200": + description: Job resource + schema: + $ref: '#/definitions/api.GetJobApiResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/api.ErrorResponse' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/api.ErrorResponse' + "403": + description: Forbidden + schema: + $ref: '#/definitions/api.ErrorResponse' + "404": + description: Resource not found + schema: + $ref: '#/definitions/api.ErrorResponse' + "422": + description: 'Unprocessable Entity: finding job failed: sql: no rows in + result set' + schema: + $ref: '#/definitions/api.ErrorResponse' + "500": + description: Internal Server Error + schema: + $ref: '#/definitions/api.ErrorResponse' + security: + - ApiKeyAuth: [] + summary: Get job meta and optional all metric data + tags: + - Job query post: consumes: - application/json @@ -648,9 +884,9 @@ paths: $ref: '#/definitions/api.ErrorResponse' security: - ApiKeyAuth: [] - summary: Get complete job meta and metric data + summary: Get job meta and configurable metric data tags: - - query + - Job query /jobs/delete_job/: delete: consumes: @@ -700,7 +936,7 @@ paths: - ApiKeyAuth: [] summary: Remove a job from the sql database tags: - - remove + - Job remove /jobs/delete_job/{id}: delete: description: Job to remove is specified by database ID. This will not remove @@ -747,7 +983,7 @@ paths: - ApiKeyAuth: [] summary: Remove a job from the sql database tags: - - remove + - Job remove /jobs/delete_job_before/{ts}: delete: description: Remove all jobs with start time before timestamp. The jobs will @@ -794,7 +1030,54 @@ paths: - ApiKeyAuth: [] summary: Remove a job from the sql database tags: - - remove + - Job remove + /jobs/edit_meta/{id}: + post: + consumes: + - application/json + description: |- + Edit key value pairs in job metadata json + If a key already exists its content will be overwritten + parameters: + - description: Job Database ID + in: path + name: id + required: true + type: integer + - description: Kay value pair to add + in: body + name: request + required: true + schema: + $ref: '#/definitions/api.EditMetaRequest' + produces: + - application/json + responses: + "200": + description: Updated job resource + schema: + $ref: '#/definitions/schema.Job' + "400": + description: Bad Request + schema: + $ref: '#/definitions/api.ErrorResponse' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/api.ErrorResponse' + "404": + description: Job does not exist + schema: + $ref: '#/definitions/api.ErrorResponse' + "500": + description: Internal Server Error + schema: + $ref: '#/definitions/api.ErrorResponse' + security: + - ApiKeyAuth: [] + summary: Edit meta-data json + tags: + - Job add and modify /jobs/start_job/: post: consumes: @@ -841,7 +1124,7 @@ paths: - ApiKeyAuth: [] summary: Adds a new job as "running" tags: - - add and modify + - Job add and modify /jobs/stop_job/: post: description: |- @@ -890,69 +1173,14 @@ paths: - ApiKeyAuth: [] summary: Marks job as completed and triggers archiving tags: - - add and modify - /jobs/stop_job/{id}: - post: - consumes: - - application/json - description: |- - Job to stop is specified by database ID. Only stopTime and final state are required in request body. - Returns full job resource information according to 'JobMeta' scheme. - parameters: - - description: Database ID of Job - in: path - name: id - required: true - type: integer - - description: stopTime and final state in request body - in: body - name: request - required: true - schema: - $ref: '#/definitions/api.StopJobApiRequest' - produces: - - application/json - responses: - "200": - description: Job resource - schema: - $ref: '#/definitions/schema.JobMeta' - "400": - description: Bad Request - schema: - $ref: '#/definitions/api.ErrorResponse' - "401": - description: Unauthorized - schema: - $ref: '#/definitions/api.ErrorResponse' - "403": - description: Forbidden - schema: - $ref: '#/definitions/api.ErrorResponse' - "404": - description: Resource not found - schema: - $ref: '#/definitions/api.ErrorResponse' - "422": - description: 'Unprocessable Entity: finding job failed: sql: no rows in - result set' - schema: - $ref: '#/definitions/api.ErrorResponse' - "500": - description: Internal Server Error - schema: - $ref: '#/definitions/api.ErrorResponse' - security: - - ApiKeyAuth: [] - summary: Marks job as completed and triggers archiving - tags: - - add and modify + - Job add and modify /jobs/tag_job/{id}: post: consumes: - application/json description: |- Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely. + Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username. If tagged job is already finished: Tag will be written directly to respective archive files. parameters: - description: Job Database ID @@ -995,12 +1223,241 @@ paths: - ApiKeyAuth: [] summary: Adds one or more tags to a job tags: - - add and modify + - Job add and modify + /user/{id}: + post: + consumes: + - multipart/form-data + description: |- + Modifies user defined by username (id) in one of four possible ways. + If more than one formValue is set then only the highest priority field is used. + Only accessible from IPs registered with apiAllowedIPs configuration option. + parameters: + - description: Database ID of User + in: path + name: id + required: true + type: string + - description: 'Priority 1: Role to add' + enum: + - admin + - support + - manager + - user + - api + in: formData + name: add-role + type: string + - description: 'Priority 2: Role to remove' + enum: + - admin + - support + - manager + - user + - api + in: formData + name: remove-role + type: string + - description: 'Priority 3: Project to add' + in: formData + name: add-project + type: string + - description: 'Priority 4: Project to remove' + in: formData + name: remove-project + type: string + produces: + - text/plain + responses: + "200": + description: Success Response Message + schema: + type: string + "400": + description: Bad Request + schema: + type: string + "401": + description: Unauthorized + schema: + type: string + "403": + description: Forbidden + schema: + type: string + "422": + description: 'Unprocessable Entity: The user could not be updated' + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Updates an existing user + tags: + - User + /users/: + delete: + consumes: + - multipart/form-data + description: |- + User defined by username in form data will be deleted from database. + Only accessible from IPs registered with apiAllowedIPs configuration option. + parameters: + - description: User ID to delete + in: formData + name: username + required: true + type: string + produces: + - text/plain + responses: + "200": + description: User deleted successfully + "400": + description: Bad Request + schema: + type: string + "401": + description: Unauthorized + schema: + type: string + "403": + description: Forbidden + schema: + type: string + "422": + description: 'Unprocessable Entity: deleting user failed' + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Deletes a user + tags: + - User + get: + description: |- + Returns a JSON-encoded list of users. + Required query-parameter defines if all users or only users with additional special roles are returned. + Only accessible from IPs registered with apiAllowedIPs configuration option. + parameters: + - description: If returned list should contain all users or only users with + additional special roles + in: query + name: not-just-user + required: true + type: boolean + produces: + - application/json + responses: + "200": + description: List of users returned successfully + schema: + items: + $ref: '#/definitions/api.ApiReturnedUser' + type: array + "400": + description: Bad Request + schema: + type: string + "401": + description: Unauthorized + schema: + type: string + "403": + description: Forbidden + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Returns a list of users + tags: + - User + post: + consumes: + - multipart/form-data + description: |- + User specified in form data will be saved to database. + Only accessible from IPs registered with apiAllowedIPs configuration option. + parameters: + - description: Unique user ID + in: formData + name: username + required: true + type: string + - description: User password + in: formData + name: password + required: true + type: string + - description: User role + enum: + - admin + - support + - manager + - user + - api + in: formData + name: role + required: true + type: string + - description: Managed project, required for new manager role user + in: formData + name: project + type: string + - description: Users name + in: formData + name: name + type: string + - description: Users email + in: formData + name: email + type: string + produces: + - text/plain + responses: + "200": + description: Success Response + schema: + type: string + "400": + description: Bad Request + schema: + type: string + "401": + description: Unauthorized + schema: + type: string + "403": + description: Forbidden + schema: + type: string + "422": + description: 'Unprocessable Entity: creating user failed' + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Adds a new user + tags: + - User securityDefinitions: ApiKeyAuth: in: header name: X-Auth-Token type: apiKey swagger: "2.0" -tags: -- name: Job API diff --git a/schemas/README.md b/schemas/README.md deleted file mode 100644 index 09fdd20..0000000 --- a/schemas/README.md +++ /dev/null @@ -1,4 +0,0 @@ -## SQL Database Schema for Job Table - -This sqlite schema for a HPC job table is used in cc-backend and also part of -the ClusterCockpit Job Archive specification. diff --git a/schemas/jobs-sqlite.sql b/schemas/jobs-sqlite.sql deleted file mode 100644 index ab37924..0000000 --- a/schemas/jobs-sqlite.sql +++ /dev/null @@ -1,71 +0,0 @@ -CREATE TABLE tag ( -id INTEGER PRIMARY KEY, -tag_type VARCHAR(255) NOT NULL, -tag_name VARCHAR(255) NOT NULL, -insert_ts TEXT DEFAULT CURRENT_TIMESTAMP, -UNIQUE (tag_type, tag_name)); - -CREATE TABLE jobtag ( -job_id INTEGER, -tag_id INTEGER, -insert_ts TEXT DEFAULT CURRENT_TIMESTAMP, -PRIMARY KEY (job_id, tag_id), -FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE, -FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE); - -CREATE TABLE user ( -username varchar(255) PRIMARY KEY NOT NULL, -password varchar(255) DEFAULT NULL, -ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */ -name varchar(255) DEFAULT NULL, -roles varchar(255) NOT NULL DEFAULT "[]", -email varchar(255) DEFAULT NULL, -projects varchar(255) NOT NULL DEFAULT "[]"); - -CREATE TABLE configuration ( -username varchar(255), -confkey varchar(255), -value varchar(255), -PRIMARY KEY (username, confkey), -FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION); - -CREATE TABLE job ( -id INTEGER PRIMARY KEY, -job_id BIGINT NOT NULL, -cluster VARCHAR(255) NOT NULL, -subcluster VARCHAR(255) NOT NULL, -start_time BIGINT NOT NULL, -- Unix timestamp -user VARCHAR(255) NOT NULL, -project VARCHAR(255) NOT NULL, -partition VARCHAR(255), -array_job_id BIGINT, -duration INT NOT NULL, -walltime INT NOT NULL, -job_state VARCHAR(255) NOT NULL -CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')), -meta_data TEXT, -- JSON -resources TEXT NOT NULL, -- JSON -num_nodes INT NOT NULL, -num_hwthreads INT, -num_acc INT, -smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )), -exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)), -monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)), -mem_used_max REAL NOT NULL DEFAULT 0.0, -flops_any_avg REAL NOT NULL DEFAULT 0.0, -mem_bw_avg REAL NOT NULL DEFAULT 0.0, -load_avg REAL NOT NULL DEFAULT 0.0, -net_bw_avg REAL NOT NULL DEFAULT 0.0, -net_data_vol_total REAL NOT NULL DEFAULT 0.0, -file_bw_avg REAL NOT NULL DEFAULT 0.0, -file_data_vol_total REAL NOT NULL DEFAULT 0.0, -UNIQUE (job_id, cluster, start_time)); - -CREATE INDEX job_stats ON job (cluster,subcluster,user); -CREATE INDEX job_by_user ON job (user); -CREATE INDEX job_by_starttime ON job (start_time); -CREATE INDEX job_by_job_id ON job (job_id, cluster, start_time); -CREATE INDEX job_list ON job (cluster, job_state); -CREATE INDEX job_list_user ON job (user, cluster, job_state); -CREATE INDEX job_list_users ON job (user, job_state); -CREATE INDEX job_list_users_start ON job (start_time, user, job_state);