mirror of
https://github.com/ClusterCockpit/cc-specifications.git
synced 2024-12-27 05:49:05 +01:00
aa5a3daff3
Add line protocol description with supported metrics. Adapt sqlite job schema.
239 lines
10 KiB
JSON
239 lines
10 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"title": "Job meta data",
|
|
"description": "Meta data information of a HPC job",
|
|
"type": "object",
|
|
"properties": {
|
|
"job_id": {
|
|
"description": "The unique identifier of a job",
|
|
"type": "string"
|
|
},
|
|
"user_id": {
|
|
"description": "The unique identifier of a user",
|
|
"type": "string"
|
|
},
|
|
"project_id": {
|
|
"description": "The unique identifier of a project",
|
|
"type": "string"
|
|
},
|
|
"cluster_id": {
|
|
"description": "The unique identifier of a cluster",
|
|
"type": "string"
|
|
},
|
|
"num_nodes": {
|
|
"description": "Number of nodes used",
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"exclusive": {
|
|
"description": "Job uses only exclusive nodes",
|
|
"type": "boolean"
|
|
},
|
|
"walltime": {
|
|
"description": "Requested walltime of job in seconds",
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"job_state": {
|
|
"description": "Final state of job",
|
|
"type": "string",
|
|
"enum": [
|
|
"completed",
|
|
"failed",
|
|
"canceled",
|
|
"timeout"
|
|
]
|
|
},
|
|
"start_time": {
|
|
"description": "Start epoch time stamp in seconds",
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"stop_time": {
|
|
"description": "Stop epoch time stamp in seconds",
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"duration": {
|
|
"description": "Duration of job in seconds",
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"nodes": {
|
|
"description": "List of nodes",
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"minItems": 1,
|
|
"uniqueItems": true
|
|
},
|
|
"tags": {
|
|
"description": "List of tags",
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {
|
|
"type": "string"
|
|
},
|
|
"type": {
|
|
"type": "string"
|
|
}
|
|
},
|
|
"required": [
|
|
"name",
|
|
"type"
|
|
]
|
|
},
|
|
"uniqueItems": true
|
|
},
|
|
"statistics": {
|
|
"description": "Job statistic data",
|
|
"type": "object",
|
|
"properties": {
|
|
"mem_used": {
|
|
"description": "Memory capacity used (required)",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"flops_any": {
|
|
"description": "Total flop rate with DP flops scaled up (required)",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"mem_bw": {
|
|
"description": "Main memory bandwidth (required)",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"net_bw": {
|
|
"description": "Total fast interconnect network bandwidth (required)",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"file_bw": {
|
|
"description": "Total file IO bandwidth (required)",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"ipc": {
|
|
"description": "Instructions executed per cycle",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"cpu_used": {
|
|
"description": "CPU core utilization",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"flops_dp": {
|
|
"description": "Double precision flop rate",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"flops_sp": {
|
|
"description": "Single precision flops rate",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"rapl_power": {
|
|
"description": "CPU power consumption",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"gpu_used": {
|
|
"description": "GPU utilization",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"gpu_mem_used": {
|
|
"description": "GPU memory capacity used",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"gpu_power": {
|
|
"description": "GPU power consumption",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"clock": {
|
|
"description": "Average core frequency",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"eth_read_bw": {
|
|
"description": "Ethernet read bandwidth",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"eth_write_bw": {
|
|
"description": "Ethernet write bandwidth",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_read_bw": {
|
|
"description": "Lustre read bandwidth",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_write_bw": {
|
|
"description": "Lustre write bandwidth",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_read_req": {
|
|
"description": "Lustre read requests",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_write_req": {
|
|
"description": "Lustre write requests",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_inodes": {
|
|
"description": "Lustre inodes used",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_accesses": {
|
|
"description": "Lustre open and close",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_fsync": {
|
|
"description": "Lustre fsync",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_create": {
|
|
"description": "Lustre create",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_open": {
|
|
"description": "Lustre open",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_close": {
|
|
"description": "Lustre close",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"lustre_seek": {
|
|
"description": "Lustre seek",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"ib_read_bw": {
|
|
"description": "Infiniband read bandwidth",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"ib_write_bw": {
|
|
"description": "Infiniband write bandwidth",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
},
|
|
"ib_congestion": {
|
|
"description": "Infiniband congestion",
|
|
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
|
|
}
|
|
},
|
|
"required": [
|
|
"mem_used",
|
|
"flops_any",
|
|
"mem_bw",
|
|
"net_bw",
|
|
"file_bw"
|
|
]
|
|
}
|
|
},
|
|
"required": [
|
|
"job_id",
|
|
"user_id",
|
|
"project_id",
|
|
"cluster_id",
|
|
"num_nodes",
|
|
"start_time",
|
|
"stop_time",
|
|
"duration",
|
|
"nodes",
|
|
"tags",
|
|
"statistics"
|
|
]
|
|
}
|