mirror of
https://github.com/ClusterCockpit/cc-metric-store.git
synced 2025-01-13 07:59:05 +01:00
commit
2bf4ec1744
53
TODO.md
53
TODO.md
@ -1,15 +1,40 @@
|
|||||||
# TODOs
|
# Possible Tasks and Improvements
|
||||||
|
|
||||||
- Improve checkpoints/archives
|
Importance:
|
||||||
- Store information in each buffer if already archived
|
|
||||||
- Do not create new checkpoint if all buffers already archived
|
- **I** Important
|
||||||
- Missing Testcases:
|
- **N** Nice to have
|
||||||
- General tests
|
- **W** Won't do. Probably not necessary.
|
||||||
- Check for corner cases that should fail gracefully
|
|
||||||
- Write a more realistic `ToArchive`/`FromArchive` tests
|
- Benchmarking
|
||||||
- Optimization: Once a buffer is full, calculate min, max and avg
|
- Benchmark and compare common timeseries DBs with our data and our queries (N)
|
||||||
- Calculate averages buffer-wise, average weighted by length of buffer
|
- Memory management
|
||||||
- Only the head-buffer needs to be fully traversed
|
- To overcome garbage collection overhead: Reimplement in Rust (N)
|
||||||
- Optimization: If aggregating over hwthreads/cores/sockets cache those results
|
- Request memory directly batchwise via mmap (started in branch) (W)
|
||||||
and reuse some of that for new queres aggregating only over the newer data
|
- Archive
|
||||||
- ...
|
- S3 backend for archive (I)
|
||||||
|
- Store information in each buffer if already archived (N)
|
||||||
|
- Do not create new checkpoint if all buffers already archived (N)
|
||||||
|
- Checkpoints
|
||||||
|
- S3 backend for checkpoints (I)
|
||||||
|
- Combine checkpoints into larger files (I)
|
||||||
|
- Binary checkpoints (started in branch) (W)
|
||||||
|
- API
|
||||||
|
- Redesign query interface (N)
|
||||||
|
- Introduce JWT authentication for REST and NATS (I)
|
||||||
|
- Testing
|
||||||
|
- General tests (I)
|
||||||
|
- Test data generator for regression tests (I)
|
||||||
|
- Check for corner cases that should fail gracefully (N)
|
||||||
|
- Write a more realistic `ToArchive`/`FromArchive` Tests (N)
|
||||||
|
- Aggregation
|
||||||
|
- Calculate averages buffer-wise as soon as full, average weighted by length of buffer (N)
|
||||||
|
- Only the head-buffer needs to be fully traversed (N)
|
||||||
|
- If aggregating over hwthreads/cores/sockets cache those results and reuse
|
||||||
|
some of that for new queries aggregating only over the newer data (W)
|
||||||
|
- Compression
|
||||||
|
- Enable compression for http API requests (N)
|
||||||
|
- Enable compression for checkpoints/archive (I)
|
||||||
|
- Sampling
|
||||||
|
- Support data re sampling to reduce data points (I)
|
||||||
|
- Use re sampling algorithms that preserve min/max as far as possible (I)
|
||||||
|
148
api/openapi.yaml
148
api/openapi.yaml
@ -1,148 +0,0 @@
|
|||||||
# OpenAPI spec describing a subset of the HTTP REST API for the cc-metric-store.
|
|
||||||
|
|
||||||
openapi: 3.0.3
|
|
||||||
info:
|
|
||||||
title: 'cc-metric-store REST API'
|
|
||||||
description: 'In-memory time series database for hpc metrics to be used with the [ClusterCockpit](https://github.com/ClusterCockpit) toolsuite'
|
|
||||||
version: 0.1.0
|
|
||||||
paths:
|
|
||||||
'/api/write':
|
|
||||||
post:
|
|
||||||
operationId: 'writeMetrics'
|
|
||||||
description: 'Recieves metrics in the influx line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)'
|
|
||||||
parameters:
|
|
||||||
- name: cluster
|
|
||||||
in: query
|
|
||||||
schema: { type: string }
|
|
||||||
description: "If the lines in the body do not have a cluster tag, use this value instead."
|
|
||||||
requestBody:
|
|
||||||
required: true
|
|
||||||
content:
|
|
||||||
'text/plain':
|
|
||||||
example:
|
|
||||||
'flops_any,cluster=emmy,hostname=e1001,type=cpu,type-id=0 value=42.0'
|
|
||||||
responses:
|
|
||||||
200:
|
|
||||||
description: 'Everything went fine'
|
|
||||||
400:
|
|
||||||
description: 'Bad Request'
|
|
||||||
'/api/query':
|
|
||||||
post:
|
|
||||||
operationId: 'queryMetrics'
|
|
||||||
description: 'Query metrics'
|
|
||||||
requestBody:
|
|
||||||
required: true
|
|
||||||
content:
|
|
||||||
'application/json':
|
|
||||||
schema:
|
|
||||||
type: object
|
|
||||||
required: [cluster, from, to]
|
|
||||||
properties:
|
|
||||||
cluster:
|
|
||||||
type: string
|
|
||||||
from:
|
|
||||||
type: integer
|
|
||||||
to:
|
|
||||||
type: integer
|
|
||||||
with-stats:
|
|
||||||
type: boolean
|
|
||||||
default: true
|
|
||||||
with-data:
|
|
||||||
type: boolean
|
|
||||||
default: true
|
|
||||||
queries:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/ApiQuery'
|
|
||||||
for-all-nodes:
|
|
||||||
description: 'If not null, add a new query for every known host on that cluster and every metric (at node-scope) specified in this array to the request. This can be used to get a metric for every host in a cluster without knowing the name of every host.'
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
responses:
|
|
||||||
200:
|
|
||||||
description: 'Requested data and stats as JSON'
|
|
||||||
content:
|
|
||||||
'application/json':
|
|
||||||
schema:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
queries:
|
|
||||||
description: 'Only if for-all-nodes was used, this property exists.'
|
|
||||||
results:
|
|
||||||
type: array
|
|
||||||
description: 'Array where each element is a response to the query at that same index in the request'
|
|
||||||
items:
|
|
||||||
description: 'If `aggreg` is true, only ever has one element.'
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
error:
|
|
||||||
description: 'If not null or undefined, an error happend processing that query'
|
|
||||||
type: string
|
|
||||||
nullable: true
|
|
||||||
data:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: number
|
|
||||||
nullable: true
|
|
||||||
avg: { type: number }
|
|
||||||
min: { type: number }
|
|
||||||
max: { type: number }
|
|
||||||
400:
|
|
||||||
description: 'Bad Request'
|
|
||||||
'/api/free':
|
|
||||||
post:
|
|
||||||
operationId: 'freeBuffers'
|
|
||||||
description: 'Allow all buffers containing only data older than `to`'
|
|
||||||
parameters:
|
|
||||||
- name: to
|
|
||||||
in: query
|
|
||||||
description: 'Unix Timestamp'
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: integer
|
|
||||||
requestBody:
|
|
||||||
required: true
|
|
||||||
content:
|
|
||||||
'application/json':
|
|
||||||
schema:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
responses:
|
|
||||||
200:
|
|
||||||
description: 'Everything went fine'
|
|
||||||
400:
|
|
||||||
description: 'Bad Request'
|
|
||||||
components:
|
|
||||||
schemas:
|
|
||||||
ApiQuery:
|
|
||||||
description: 'A single query for a specific metric resulting in one series'
|
|
||||||
type: object
|
|
||||||
required: [metric, hostname, aggreg]
|
|
||||||
properties:
|
|
||||||
metirc:
|
|
||||||
type: string
|
|
||||||
hostname:
|
|
||||||
type: string
|
|
||||||
type:
|
|
||||||
description: 'Not required for node-level requests. Usually something like socket, cpu or hwthread.'
|
|
||||||
type: string
|
|
||||||
type-ids:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
aggreg:
|
|
||||||
type: boolean
|
|
||||||
description: 'If true, every query result will have exactly one element. Otherwise, the data for every requested type-id/sub-type-id is provided seperately'
|
|
||||||
securitySchemes:
|
|
||||||
bearerAuth:
|
|
||||||
type: http
|
|
||||||
scheme: bearer
|
|
||||||
bearerFormat: JWT
|
|
||||||
security:
|
|
||||||
- bearerAuth: [] # Applies `bearerAuth` globally
|
|
Loading…
Reference in New Issue
Block a user