mirror of
				https://github.com/ClusterCockpit/cc-metric-store.git
				synced 2025-10-31 09:05:06 +01:00 
			
		
		
		
	Update todos. Cleanup
This commit is contained in:
		
							
								
								
									
										49
									
								
								TODO.md
									
									
									
									
									
								
							
							
						
						
									
										49
									
								
								TODO.md
									
									
									
									
									
								
							| @@ -1,33 +1,40 @@ | ||||
| # Possible Tasks and Improvements | ||||
|  | ||||
| Importance: | ||||
|  | ||||
| - **I** Important | ||||
| - **N** Nice to have | ||||
| - **W** Won't do. Probably not necessary. | ||||
|  | ||||
| - Benchmarking | ||||
|   - Benchmark and compare common timeseries DBs with our data and our queries | ||||
|   - Benchmark and compare common timeseries DBs with our data and our queries (N) | ||||
| - Memory management | ||||
|   - To overcome garbage collection overhead: Reimplement in Rust | ||||
|   - Request memory directly batchwise via mmap (started in branch) | ||||
|   - To overcome garbage collection overhead: Reimplement in Rust (N) | ||||
|   - Request memory directly batchwise via mmap (started in branch) (W) | ||||
| - Archive | ||||
|   - S3 backend for archive | ||||
|   - Store information in each buffer if already archived | ||||
|   - Do not create new checkpoint if all buffers already archived | ||||
|   - S3 backend for archive (I) | ||||
|   - Store information in each buffer if already archived (N) | ||||
|   - Do not create new checkpoint if all buffers already archived (N) | ||||
| - Checkpoints | ||||
|   - S3 backend for checkpoints | ||||
|   - Combine checkpoints into larger files | ||||
|   - Binary checkpoints (started in branch) | ||||
|   - S3 backend for checkpoints (I) | ||||
|   - Combine checkpoints into larger files (I) | ||||
|   - Binary checkpoints (started in branch) (W) | ||||
| - API | ||||
|   - Redesign query interface | ||||
|   - Introduce JWT authentication for REST and NATS | ||||
|   - Redesign query interface (N) | ||||
|   - Introduce JWT authentication for REST and NATS (I) | ||||
| - Testing | ||||
|   - General tests | ||||
|   - Check for corner cases that should fail gracefully | ||||
|   - Write a more realistic `ToArchive`/`FromArchive` Tests | ||||
|   - General tests (I) | ||||
|   - Test data generator for regression tests (I) | ||||
|   - Check for corner cases that should fail gracefully (N) | ||||
|   - Write a more realistic `ToArchive`/`FromArchive` Tests (N) | ||||
| - Aggregation | ||||
|   - Calculate averages buffer-wise as soon as full, average weighted by length of buffer | ||||
|   - Only the head-buffer needs to be fully traversed | ||||
|   - Calculate averages buffer-wise as soon as full, average weighted by length of buffer (N) | ||||
|   - Only the head-buffer needs to be fully traversed (N) | ||||
|   - If aggregating over hwthreads/cores/sockets cache those results and reuse | ||||
|     some of that for new queries aggregating only over the newer data | ||||
|     some of that for new queries aggregating only over the newer data (W) | ||||
| - Compression | ||||
|   - Enable compression for http API requests | ||||
|   - Enable compression for checkpoints/archive | ||||
|   - Enable compression for http API requests (N) | ||||
|   - Enable compression for checkpoints/archive (I) | ||||
| - Sampling | ||||
|   - Support data re sampling to reduce data points | ||||
|   - Support re sampling algorithms that preserve min/max as far as possible | ||||
|   - Support data re sampling to reduce data points (I) | ||||
|   - Use re sampling algorithms that preserve min/max as far as possible (I) | ||||
|   | ||||
							
								
								
									
										148
									
								
								api/openapi.yaml
									
									
									
									
									
								
							
							
						
						
									
										148
									
								
								api/openapi.yaml
									
									
									
									
									
								
							| @@ -1,148 +0,0 @@ | ||||
| # OpenAPI spec describing a subset of the HTTP REST API for the cc-metric-store. | ||||
|  | ||||
| openapi: 3.0.3 | ||||
| info: | ||||
|   title: 'cc-metric-store REST API' | ||||
|   description: 'In-memory time series database for hpc metrics to be used with the [ClusterCockpit](https://github.com/ClusterCockpit) toolsuite' | ||||
|   version: 0.1.0 | ||||
| paths: | ||||
|   '/api/write': | ||||
|     post: | ||||
|       operationId: 'writeMetrics' | ||||
|       description: 'Recieves metrics in the influx line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)' | ||||
|       parameters: | ||||
|         - name: cluster | ||||
|           in: query | ||||
|           schema: { type: string } | ||||
|           description: "If the lines in the body do not have a cluster tag, use this value instead." | ||||
|       requestBody: | ||||
|         required: true | ||||
|         content: | ||||
|           'text/plain': | ||||
|             example: | ||||
|               'flops_any,cluster=emmy,hostname=e1001,type=cpu,type-id=0 value=42.0' | ||||
|       responses: | ||||
|         200: | ||||
|           description: 'Everything went fine' | ||||
|         400: | ||||
|           description: 'Bad Request' | ||||
|   '/api/query': | ||||
|     post: | ||||
|       operationId: 'queryMetrics' | ||||
|       description: 'Query metrics' | ||||
|       requestBody: | ||||
|         required: true | ||||
|         content: | ||||
|           'application/json': | ||||
|             schema: | ||||
|               type: object | ||||
|               required: [cluster, from, to] | ||||
|               properties: | ||||
|                 cluster: | ||||
|                   type: string | ||||
|                 from: | ||||
|                   type: integer | ||||
|                 to: | ||||
|                   type: integer | ||||
|                 with-stats: | ||||
|                   type: boolean | ||||
|                   default: true | ||||
|                 with-data: | ||||
|                   type: boolean | ||||
|                   default: true | ||||
|                 queries: | ||||
|                   type: array | ||||
|                   items: | ||||
|                     $ref: '#/components/schemas/ApiQuery' | ||||
|                 for-all-nodes: | ||||
|                   description: 'If not null, add a new query for every known host on that cluster and every metric (at node-scope) specified in this array to the request. This can be used to get a metric for every host in a cluster without knowing the name of every host.' | ||||
|                   type: array | ||||
|                   items: | ||||
|                     type: string | ||||
|       responses: | ||||
|         200: | ||||
|           description: 'Requested data and stats as JSON' | ||||
|           content: | ||||
|             'application/json': | ||||
|               schema: | ||||
|                 type: object | ||||
|                 properties: | ||||
|                   queries: | ||||
|                     description: 'Only if for-all-nodes was used, this property exists.' | ||||
|                   results: | ||||
|                     type: array | ||||
|                     description: 'Array where each element is a response to the query at that same index in the request' | ||||
|                     items: | ||||
|                       description: 'If `aggreg` is true, only ever has one element.' | ||||
|                       type: array | ||||
|                       items: | ||||
|                         type: object | ||||
|                         properties: | ||||
|                           error: | ||||
|                             description: 'If not null or undefined, an error happend processing that query' | ||||
|                             type: string | ||||
|                             nullable: true | ||||
|                           data: | ||||
|                             type: array | ||||
|                             items: | ||||
|                               type: number | ||||
|                               nullable: true | ||||
|                           avg: { type: number } | ||||
|                           min: { type: number } | ||||
|                           max: { type: number } | ||||
|         400: | ||||
|           description: 'Bad Request' | ||||
|   '/api/free': | ||||
|     post: | ||||
|       operationId: 'freeBuffers' | ||||
|       description: 'Allow all buffers containing only data older than `to`' | ||||
|       parameters: | ||||
|         - name: to | ||||
|           in: query | ||||
|           description: 'Unix Timestamp' | ||||
|           required: true | ||||
|           schema: | ||||
|             type: integer | ||||
|       requestBody: | ||||
|         required: true | ||||
|         content: | ||||
|           'application/json': | ||||
|             schema: | ||||
|               type: array | ||||
|               items: | ||||
|                 type: array | ||||
|                 items: | ||||
|                   type: string | ||||
|       responses: | ||||
|         200: | ||||
|           description: 'Everything went fine' | ||||
|         400: | ||||
|           description: 'Bad Request' | ||||
| components: | ||||
|   schemas: | ||||
|     ApiQuery: | ||||
|       description: 'A single query for a specific metric resulting in one series' | ||||
|       type: object | ||||
|       required: [metric, hostname, aggreg] | ||||
|       properties: | ||||
|         metirc: | ||||
|           type: string | ||||
|         hostname: | ||||
|           type: string | ||||
|         type: | ||||
|           description: 'Not required for node-level requests. Usually something like socket, cpu or hwthread.' | ||||
|           type: string | ||||
|         type-ids: | ||||
|           type: array | ||||
|           items: | ||||
|             type: string | ||||
|         aggreg: | ||||
|           type: boolean | ||||
|           description: 'If true, every query result will have exactly one element. Otherwise, the data for every requested type-id/sub-type-id is provided seperately' | ||||
|   securitySchemes: | ||||
|     bearerAuth: | ||||
|       type: http | ||||
|       scheme: bearer | ||||
|       bearerFormat: JWT | ||||
| security: | ||||
|   - bearerAuth: [] # Applies `bearerAuth` globally | ||||
		Reference in New Issue
	
	Block a user