Update README and add json job formats

This commit is contained in:
Jan Eitzinger 2019-10-21 14:50:16 +02:00
parent ac03ecf944
commit 0883fa200a
8 changed files with 597 additions and 2 deletions

View File

@ -1,2 +1,12 @@
# File-specification
Specification of file formats and directory structures
# ClusterCockpit Standardization of APIs and data formats
## RESTful API specifications
The specifications are defined following the (https://www.openapis.org)[OpenAPI Initiative] format standard.
## Data exchange formats
Main data exchange format is JSON.
The specifications are defined following the (https://json-schema.org)[JSON schema] format standard.
## Database schemas

View File

@ -0,0 +1,50 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "HPC Cluster description",
"description": "Meta data information of a HPC cluster",
"type": "object",
"properties":{
"cluster_id": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"processor_type": {
"description": "Processor type",
"type": "string"
},
"sockets_per_node": {
"description": "Number of sockets per node",
"type": "integer"
},
"cores_per_socket": {
"description": "Number of cores per socket",
"type": "integer"
},
"threads_per_core": {
"description": "Number of SMT threads per core",
"type": "integer"
},
"flop_rate_scalar": {
"description": "Theorethical node peak flop rate for scalar code in GFlops/s",
"type": "integer"
},
"flop_rate_simd": {
"description": "Theorethical node peak flop rate for SIMD code in GFlops/s",
"type": "integer"
},
"memory_bandwidth": {
"description": "Theorethical node peak memory bandwidth in GB/s",
"type": "integer"
}
},
"required":[
"cluster_id",
"processor_type",
"sockets_per_node",
"cores_per_socket",
"threads_per_core",
"flop_rate_scalar",
"flop_rate_simd",
"memory_bandwidth"
]
}

View File

@ -0,0 +1,135 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job metric data",
"description": "Meta data information of a HPC job",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"mem_bw": {
"description": "Main memory bandwidth (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"net_bw": {
"description": "Total fast interconnect network bandwidth (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"file_bw": {
"description": "Total file IO bandwidth (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"ipc": {
"description": "Instructions executed per cycle",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"cpu_used": {
"description": "CPU core utilization",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"flops_dp": {
"description": "Double precision flop rate",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"flops_sp": {
"description": "Single precision flops rate",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"rapl_power": {
"description": "CPU power consumption",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"gpu_used": {
"description": "GPU utilization",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"gpu_mem_used": {
"description": "GPU memory capacity used",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"gpu_power": {
"description": "GPU power consumption",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"clock": {
"description": "Average core frequency",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_read_bw": {
"description": "Lustre read bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_write_bw": {
"description": "Lustre write bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_read_req": {
"description": "Lustre read requests",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_write_req": {
"description": "Lustre write requests",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_inodes": {
"description": "Lustre inodes used",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_accesses": {
"description": "Lustre open and close",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_fsync": {
"description": "Lustre fsync",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_create": {
"description": "Lustre create",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_open": {
"description": "Lustre open",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_close": {
"description": "Lustre close",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"lustre_seek": {
"description": "Lustre seek",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"ib_read_bw": {
"description": "Infiniband read bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"ib_write_bw": {
"description": "Infiniband write bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
},
"ib_congestion": {
"description": "Infiniband congestion",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json"
}
},
"required": [
"mem_used",
"flops_any",
"mem_bw",
"net_bw",
"file_bw"
]
}

View File

@ -0,0 +1,238 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job meta data",
"description": "Meta data information of a HPC job",
"type": "object",
"properties": {
"job_id": {
"description": "The unique identifier of a job",
"type": "string"
},
"user_id": {
"description": "The unique identifier of a user",
"type": "string"
},
"project_id": {
"description": "The unique identifier of a project",
"type": "string"
},
"cluster_id": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"num_nodes": {
"description": "Number of nodes used",
"type": "integer",
"exclusiveMinimum": 0
},
"exclusive": {
"description": "Job uses only exclusive nodes",
"type": "boolean"
},
"walltime": {
"description": "Requested walltime of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"job_state": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"canceled",
"timeout"
]
},
"start_time": {
"description": "Start epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"stop_time": {
"description": "Stop epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"duration": {
"description": "Duration of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"nodes": {
"description": "List of nodes",
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"uniqueItems": true
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string"
}
},
"required": [
"name",
"type"
]
},
"uniqueItems": true
},
"statistics": {
"description": "Job statistic data",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"mem_bw": {
"description": "Main memory bandwidth (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"net_bw": {
"description": "Total fast interconnect network bandwidth (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"file_bw": {
"description": "Total file IO bandwidth (required)",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"ipc": {
"description": "Instructions executed per cycle",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"cpu_used": {
"description": "CPU core utilization",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"flops_dp": {
"description": "Double precision flop rate",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"flops_sp": {
"description": "Single precision flops rate",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"rapl_power": {
"description": "CPU power consumption",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"gpu_used": {
"description": "GPU utilization",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"gpu_mem_used": {
"description": "GPU memory capacity used",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"gpu_power": {
"description": "GPU power consumption",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"clock": {
"description": "Average core frequency",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_read_bw": {
"description": "Lustre read bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_write_bw": {
"description": "Lustre write bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_read_req": {
"description": "Lustre read requests",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_write_req": {
"description": "Lustre write requests",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_inodes": {
"description": "Lustre inodes used",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_accesses": {
"description": "Lustre open and close",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_fsync": {
"description": "Lustre fsync",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_create": {
"description": "Lustre create",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_open": {
"description": "Lustre open",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_close": {
"description": "Lustre close",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"lustre_seek": {
"description": "Lustre seek",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"ib_read_bw": {
"description": "Infiniband read bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"ib_write_bw": {
"description": "Infiniband write bandwidth",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
},
"ib_congestion": {
"description": "Infiniband congestion",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json"
}
},
"required": [
"mem_used",
"flops_any",
"mem_bw",
"net_bw",
"file_bw"
]
}
},
"required": [
"job_id",
"user_id",
"project_id",
"cluster_id",
"num_nodes",
"start_time",
"stop_time",
"duration",
"nodes",
"tags",
"statistics"
]
}

View File

@ -0,0 +1,83 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job metric data",
"description": "Metric data of a HPC job",
"type": "object",
"properties": {
"unit": {
"description": "",
"type": "string"
},
"scope": {
"description": "",
"type": "string",
"enum": [
"node",
"cpu",
"socket"
]
},
"timestep": {
"description": "Measurement interval in seconds",
"type": "integer"
},
"series": {
"description": "",
"type": "array",
"items": {
"type": "object",
"properties": {
"node_id": {
"type": "string"
},
"id": {
"type": "integer"
},
"statistics": {
"type": "object",
"properties": {
"avg": {
"description": "Series average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Series minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Series maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"avg",
"min",
"max"
]
},
"data": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 1
}
},
"required": [
"node_id",
"data"
]
}
}
},
"required": [
"unit",
"scope",
"timestep",
"series"
]
}

View File

@ -0,0 +1,33 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job statistics",
"description": "Format specification for job metric statistics",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/unit.schema.json"
},
"avg": {
"description": "Job metric average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Job metric minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Job metric maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"unit",
"avg",
"min",
"max"
]
}

View File

@ -0,0 +1,36 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Metric unit",
"description": "Format specification for job metric units",
"type": "object",
"properties": {
"base_unit": {
"description": "Metric base unit",
"type": "string",
"enum": [
"B",
"F",
"B/s",
"F/s",
"CPI",
"IPC",
"Hz",
]
},
"prefix": {
"description": "Unit prefix",
"type": "string",
"enum": [
"K",
"M",
"G",
"T",
"P",
"E"
]
}
},
"required": [
"base_unit"
]
}

View File

@ -0,0 +1,10 @@
CREATE TABLE job ( id INTEGER PRIMARY KEY,
job_id TEXT, user_id TEXT, project_id TEXT, cluster_id TEXT,
start_time INTEGER, stop_time INTEGER, duration INTEGER,
walltime INTEGER, job_state TEXT,
num_nodes INTEGER, node_list TEXT, has_profile INTEGER,
mem_used_max REAL, flops_any_avg REAL, mem_bw_avg REAL, ib_bw_avg REAL, file_bw_avg REAL);
CREATE TABLE tag ( id INTEGER PRIMARY KEY, tag_type TEXT, tag_name TEXT);
CREATE TABLE jobtag ( job_id INTEGER, tag_id INTEGER, PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE ON UPDATE NO ACTION,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE ON UPDATE NO ACTION );