diff --git a/README.md b/README.md index 1437bfc..be2d577 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,12 @@ -# File-specification -Specification of file formats and directory structures +# ClusterCockpit Standardization of APIs and data formats + +## RESTful API specifications + +The specifications are defined following the (https://www.openapis.org)[OpenAPI Initiative] format standard. + +## Data exchange formats + +Main data exchange format is JSON. +The specifications are defined following the (https://json-schema.org)[JSON schema] format standard. + +## Database schemas diff --git a/dataformat/json/cluster.schema.json b/dataformat/json/cluster.schema.json new file mode 100644 index 0000000..43baae1 --- /dev/null +++ b/dataformat/json/cluster.schema.json @@ -0,0 +1,50 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "HPC Cluster description", + "description": "Meta data information of a HPC cluster", + "type": "object", + "properties":{ + "cluster_id": { + "description": "The unique identifier of a cluster", + "type": "string" + }, + "processor_type": { + "description": "Processor type", + "type": "string" + }, + "sockets_per_node": { + "description": "Number of sockets per node", + "type": "integer" + }, + "cores_per_socket": { + "description": "Number of cores per socket", + "type": "integer" + }, + "threads_per_core": { + "description": "Number of SMT threads per core", + "type": "integer" + }, + "flop_rate_scalar": { + "description": "Theorethical node peak flop rate for scalar code in GFlops/s", + "type": "integer" + }, + "flop_rate_simd": { + "description": "Theorethical node peak flop rate for SIMD code in GFlops/s", + "type": "integer" + }, + "memory_bandwidth": { + "description": "Theorethical node peak memory bandwidth in GB/s", + "type": "integer" + } + }, + "required":[ + "cluster_id", + "processor_type", + "sockets_per_node", + "cores_per_socket", + "threads_per_core", + "flop_rate_scalar", + "flop_rate_simd", + "memory_bandwidth" + ] +} diff --git a/dataformat/json/job-data.schema.json b/dataformat/json/job-data.schema.json new file mode 100644 index 0000000..37f66be --- /dev/null +++ b/dataformat/json/job-data.schema.json @@ -0,0 +1,135 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Job metric data", + "description": "Meta data information of a HPC job", + "type": "object", + "properties": { + "mem_used": { + "description": "Memory capacity used (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "flops_any": { + "description": "Total flop rate with DP flops scaled up (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "mem_bw": { + "description": "Main memory bandwidth (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "net_bw": { + "description": "Total fast interconnect network bandwidth (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "file_bw": { + "description": "Total file IO bandwidth (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "ipc": { + "description": "Instructions executed per cycle", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "cpu_used": { + "description": "CPU core utilization", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "flops_dp": { + "description": "Double precision flop rate", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "flops_sp": { + "description": "Single precision flops rate", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "rapl_power": { + "description": "CPU power consumption", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "gpu_used": { + "description": "GPU utilization", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "gpu_mem_used": { + "description": "GPU memory capacity used", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "gpu_power": { + "description": "GPU power consumption", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "clock": { + "description": "Average core frequency", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "eth_read_bw": { + "description": "Ethernet read bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "eth_write_bw": { + "description": "Ethernet write bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_read_bw": { + "description": "Lustre read bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_write_bw": { + "description": "Lustre write bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_read_req": { + "description": "Lustre read requests", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_write_req": { + "description": "Lustre write requests", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_inodes": { + "description": "Lustre inodes used", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_accesses": { + "description": "Lustre open and close", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_fsync": { + "description": "Lustre fsync", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_create": { + "description": "Lustre create", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_open": { + "description": "Lustre open", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_close": { + "description": "Lustre close", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "lustre_seek": { + "description": "Lustre seek", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "ib_read_bw": { + "description": "Infiniband read bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "ib_write_bw": { + "description": "Infiniband write bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + }, + "ib_congestion": { + "description": "Infiniband congestion", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-metric-data.schema.json" + } + }, + "required": [ + "mem_used", + "flops_any", + "mem_bw", + "net_bw", + "file_bw" + ] +} diff --git a/dataformat/json/job-meta.schema.json b/dataformat/json/job-meta.schema.json new file mode 100644 index 0000000..2434f43 --- /dev/null +++ b/dataformat/json/job-meta.schema.json @@ -0,0 +1,238 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Job meta data", + "description": "Meta data information of a HPC job", + "type": "object", + "properties": { + "job_id": { + "description": "The unique identifier of a job", + "type": "string" + }, + "user_id": { + "description": "The unique identifier of a user", + "type": "string" + }, + "project_id": { + "description": "The unique identifier of a project", + "type": "string" + }, + "cluster_id": { + "description": "The unique identifier of a cluster", + "type": "string" + }, + "num_nodes": { + "description": "Number of nodes used", + "type": "integer", + "exclusiveMinimum": 0 + }, + "exclusive": { + "description": "Job uses only exclusive nodes", + "type": "boolean" + }, + "walltime": { + "description": "Requested walltime of job in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "job_state": { + "description": "Final state of job", + "type": "string", + "enum": [ + "completed", + "failed", + "canceled", + "timeout" + ] + }, + "start_time": { + "description": "Start epoch time stamp in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "stop_time": { + "description": "Stop epoch time stamp in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "duration": { + "description": "Duration of job in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "nodes": { + "description": "List of nodes", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": true + }, + "tags": { + "description": "List of tags", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string" + } + }, + "required": [ + "name", + "type" + ] + }, + "uniqueItems": true + }, + "statistics": { + "description": "Job statistic data", + "type": "object", + "properties": { + "mem_used": { + "description": "Memory capacity used (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "flops_any": { + "description": "Total flop rate with DP flops scaled up (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "mem_bw": { + "description": "Main memory bandwidth (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "net_bw": { + "description": "Total fast interconnect network bandwidth (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "file_bw": { + "description": "Total file IO bandwidth (required)", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "ipc": { + "description": "Instructions executed per cycle", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "cpu_used": { + "description": "CPU core utilization", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "flops_dp": { + "description": "Double precision flop rate", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "flops_sp": { + "description": "Single precision flops rate", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "rapl_power": { + "description": "CPU power consumption", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "gpu_used": { + "description": "GPU utilization", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "gpu_mem_used": { + "description": "GPU memory capacity used", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "gpu_power": { + "description": "GPU power consumption", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "clock": { + "description": "Average core frequency", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "eth_read_bw": { + "description": "Ethernet read bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "eth_write_bw": { + "description": "Ethernet write bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_read_bw": { + "description": "Lustre read bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_write_bw": { + "description": "Lustre write bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_read_req": { + "description": "Lustre read requests", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_write_req": { + "description": "Lustre write requests", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_inodes": { + "description": "Lustre inodes used", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_accesses": { + "description": "Lustre open and close", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_fsync": { + "description": "Lustre fsync", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_create": { + "description": "Lustre create", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_open": { + "description": "Lustre open", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_close": { + "description": "Lustre close", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "lustre_seek": { + "description": "Lustre seek", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "ib_read_bw": { + "description": "Infiniband read bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "ib_write_bw": { + "description": "Infiniband write bandwidth", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + }, + "ib_congestion": { + "description": "Infiniband congestion", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-statistic.schema.json" + } + }, + "required": [ + "mem_used", + "flops_any", + "mem_bw", + "net_bw", + "file_bw" + ] + } + }, + "required": [ + "job_id", + "user_id", + "project_id", + "cluster_id", + "num_nodes", + "start_time", + "stop_time", + "duration", + "nodes", + "tags", + "statistics" + ] +} diff --git a/dataformat/json/job-metric-data.schema.json b/dataformat/json/job-metric-data.schema.json new file mode 100644 index 0000000..8d0cbb5 --- /dev/null +++ b/dataformat/json/job-metric-data.schema.json @@ -0,0 +1,83 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Job metric data", + "description": "Metric data of a HPC job", + "type": "object", + "properties": { + "unit": { + "description": "", + "type": "string" + }, + "scope": { + "description": "", + "type": "string", + "enum": [ + "node", + "cpu", + "socket" + ] + }, + "timestep": { + "description": "Measurement interval in seconds", + "type": "integer" + }, + "series": { + "description": "", + "type": "array", + "items": { + "type": "object", + "properties": { + "node_id": { + "type": "string" + }, + "id": { + "type": "integer" + }, + "statistics": { + "type": "object", + "properties": { + "avg": { + "description": "Series average", + "type": "number", + "minimum": 0 + }, + "min": { + "description": "Series minimum", + "type": "number", + "minimum": 0 + }, + "max": { + "description": "Series maximum", + "type": "number", + "minimum": 0 + } + }, + "required": [ + "avg", + "min", + "max" + ] + }, + "data": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 1 + } + }, + "required": [ + "node_id", + "data" + ] + } + } + }, + "required": [ + "unit", + "scope", + "timestep", + "series" + ] +} diff --git a/dataformat/json/job-statistic.schema.json b/dataformat/json/job-statistic.schema.json new file mode 100644 index 0000000..04d5e93 --- /dev/null +++ b/dataformat/json/job-statistic.schema.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Job statistics", + "description": "Format specification for job metric statistics", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/unit.schema.json" + }, + "avg": { + "description": "Job metric average", + "type": "number", + "minimum": 0 + }, + "min": { + "description": "Job metric minimum", + "type": "number", + "minimum": 0 + }, + "max": { + "description": "Job metric maximum", + "type": "number", + "minimum": 0 + } + }, + "required": [ + "unit", + "avg", + "min", + "max" + ] +} diff --git a/dataformat/json/unit.schema.json b/dataformat/json/unit.schema.json new file mode 100644 index 0000000..1917361 --- /dev/null +++ b/dataformat/json/unit.schema.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Metric unit", + "description": "Format specification for job metric units", + "type": "object", + "properties": { + "base_unit": { + "description": "Metric base unit", + "type": "string", + "enum": [ + "B", + "F", + "B/s", + "F/s", + "CPI", + "IPC", + "Hz", + ] + }, + "prefix": { + "description": "Unit prefix", + "type": "string", + "enum": [ + "K", + "M", + "G", + "T", + "P", + "E" + ] + } + }, + "required": [ + "base_unit" + ] +} diff --git a/dataformat/sql/jobDB-sqlite.sql b/dataformat/sql/jobDB-sqlite.sql new file mode 100644 index 0000000..c9727c8 --- /dev/null +++ b/dataformat/sql/jobDB-sqlite.sql @@ -0,0 +1,10 @@ +CREATE TABLE job ( id INTEGER PRIMARY KEY, + job_id TEXT, user_id TEXT, project_id TEXT, cluster_id TEXT, + start_time INTEGER, stop_time INTEGER, duration INTEGER, + walltime INTEGER, job_state TEXT, + num_nodes INTEGER, node_list TEXT, has_profile INTEGER, + mem_used_max REAL, flops_any_avg REAL, mem_bw_avg REAL, ib_bw_avg REAL, file_bw_avg REAL); +CREATE TABLE tag ( id INTEGER PRIMARY KEY, tag_type TEXT, tag_name TEXT); +CREATE TABLE jobtag ( job_id INTEGER, tag_id INTEGER, PRIMARY KEY (job_id, tag_id), + FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE ON UPDATE NO ACTION, + FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE ON UPDATE NO ACTION );