mirror of
https://github.com/ClusterCockpit/cc-metric-store.git
synced 2024-12-26 00:49:05 +01:00
commit
2bf4ec1744
53
TODO.md
53
TODO.md
@ -1,15 +1,40 @@
|
||||
# TODOs
|
||||
# Possible Tasks and Improvements
|
||||
|
||||
- Improve checkpoints/archives
|
||||
- Store information in each buffer if already archived
|
||||
- Do not create new checkpoint if all buffers already archived
|
||||
- Missing Testcases:
|
||||
- General tests
|
||||
- Check for corner cases that should fail gracefully
|
||||
- Write a more realistic `ToArchive`/`FromArchive` tests
|
||||
- Optimization: Once a buffer is full, calculate min, max and avg
|
||||
- Calculate averages buffer-wise, average weighted by length of buffer
|
||||
- Only the head-buffer needs to be fully traversed
|
||||
- Optimization: If aggregating over hwthreads/cores/sockets cache those results
|
||||
and reuse some of that for new queres aggregating only over the newer data
|
||||
- ...
|
||||
Importance:
|
||||
|
||||
- **I** Important
|
||||
- **N** Nice to have
|
||||
- **W** Won't do. Probably not necessary.
|
||||
|
||||
- Benchmarking
|
||||
- Benchmark and compare common timeseries DBs with our data and our queries (N)
|
||||
- Memory management
|
||||
- To overcome garbage collection overhead: Reimplement in Rust (N)
|
||||
- Request memory directly batchwise via mmap (started in branch) (W)
|
||||
- Archive
|
||||
- S3 backend for archive (I)
|
||||
- Store information in each buffer if already archived (N)
|
||||
- Do not create new checkpoint if all buffers already archived (N)
|
||||
- Checkpoints
|
||||
- S3 backend for checkpoints (I)
|
||||
- Combine checkpoints into larger files (I)
|
||||
- Binary checkpoints (started in branch) (W)
|
||||
- API
|
||||
- Redesign query interface (N)
|
||||
- Introduce JWT authentication for REST and NATS (I)
|
||||
- Testing
|
||||
- General tests (I)
|
||||
- Test data generator for regression tests (I)
|
||||
- Check for corner cases that should fail gracefully (N)
|
||||
- Write a more realistic `ToArchive`/`FromArchive` Tests (N)
|
||||
- Aggregation
|
||||
- Calculate averages buffer-wise as soon as full, average weighted by length of buffer (N)
|
||||
- Only the head-buffer needs to be fully traversed (N)
|
||||
- If aggregating over hwthreads/cores/sockets cache those results and reuse
|
||||
some of that for new queries aggregating only over the newer data (W)
|
||||
- Compression
|
||||
- Enable compression for http API requests (N)
|
||||
- Enable compression for checkpoints/archive (I)
|
||||
- Sampling
|
||||
- Support data re sampling to reduce data points (I)
|
||||
- Use re sampling algorithms that preserve min/max as far as possible (I)
|
||||
|
148
api/openapi.yaml
148
api/openapi.yaml
@ -1,148 +0,0 @@
|
||||
# OpenAPI spec describing a subset of the HTTP REST API for the cc-metric-store.
|
||||
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: 'cc-metric-store REST API'
|
||||
description: 'In-memory time series database for hpc metrics to be used with the [ClusterCockpit](https://github.com/ClusterCockpit) toolsuite'
|
||||
version: 0.1.0
|
||||
paths:
|
||||
'/api/write':
|
||||
post:
|
||||
operationId: 'writeMetrics'
|
||||
description: 'Recieves metrics in the influx line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)'
|
||||
parameters:
|
||||
- name: cluster
|
||||
in: query
|
||||
schema: { type: string }
|
||||
description: "If the lines in the body do not have a cluster tag, use this value instead."
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'text/plain':
|
||||
example:
|
||||
'flops_any,cluster=emmy,hostname=e1001,type=cpu,type-id=0 value=42.0'
|
||||
responses:
|
||||
200:
|
||||
description: 'Everything went fine'
|
||||
400:
|
||||
description: 'Bad Request'
|
||||
'/api/query':
|
||||
post:
|
||||
operationId: 'queryMetrics'
|
||||
description: 'Query metrics'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
required: [cluster, from, to]
|
||||
properties:
|
||||
cluster:
|
||||
type: string
|
||||
from:
|
||||
type: integer
|
||||
to:
|
||||
type: integer
|
||||
with-stats:
|
||||
type: boolean
|
||||
default: true
|
||||
with-data:
|
||||
type: boolean
|
||||
default: true
|
||||
queries:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ApiQuery'
|
||||
for-all-nodes:
|
||||
description: 'If not null, add a new query for every known host on that cluster and every metric (at node-scope) specified in this array to the request. This can be used to get a metric for every host in a cluster without knowing the name of every host.'
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
responses:
|
||||
200:
|
||||
description: 'Requested data and stats as JSON'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
queries:
|
||||
description: 'Only if for-all-nodes was used, this property exists.'
|
||||
results:
|
||||
type: array
|
||||
description: 'Array where each element is a response to the query at that same index in the request'
|
||||
items:
|
||||
description: 'If `aggreg` is true, only ever has one element.'
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
error:
|
||||
description: 'If not null or undefined, an error happend processing that query'
|
||||
type: string
|
||||
nullable: true
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
nullable: true
|
||||
avg: { type: number }
|
||||
min: { type: number }
|
||||
max: { type: number }
|
||||
400:
|
||||
description: 'Bad Request'
|
||||
'/api/free':
|
||||
post:
|
||||
operationId: 'freeBuffers'
|
||||
description: 'Allow all buffers containing only data older than `to`'
|
||||
parameters:
|
||||
- name: to
|
||||
in: query
|
||||
description: 'Unix Timestamp'
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
responses:
|
||||
200:
|
||||
description: 'Everything went fine'
|
||||
400:
|
||||
description: 'Bad Request'
|
||||
components:
|
||||
schemas:
|
||||
ApiQuery:
|
||||
description: 'A single query for a specific metric resulting in one series'
|
||||
type: object
|
||||
required: [metric, hostname, aggreg]
|
||||
properties:
|
||||
metirc:
|
||||
type: string
|
||||
hostname:
|
||||
type: string
|
||||
type:
|
||||
description: 'Not required for node-level requests. Usually something like socket, cpu or hwthread.'
|
||||
type: string
|
||||
type-ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
aggreg:
|
||||
type: boolean
|
||||
description: 'If true, every query result will have exactly one element. Otherwise, the data for every requested type-id/sub-type-id is provided seperately'
|
||||
securitySchemes:
|
||||
bearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
security:
|
||||
- bearerAuth: [] # Applies `bearerAuth` globally
|
Loading…
Reference in New Issue
Block a user