cc-specifications/schema/json/job-meta.schema.json

354 lines
16 KiB
JSON
Raw Normal View History

2019-10-21 14:50:16 +02:00
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job meta data",
"description": "Meta data information of a HPC job",
"type": "object",
"properties": {
"job_id": {
"description": "The unique identifier of a job",
"type": "integer"
2019-10-21 14:50:16 +02:00
},
"user": {
2019-10-21 14:50:16 +02:00
"description": "The unique identifier of a user",
"type": "string"
},
"project": {
2019-10-21 14:50:16 +02:00
"description": "The unique identifier of a project",
"type": "string"
},
"cluster": {
2019-10-21 14:50:16 +02:00
"description": "The unique identifier of a cluster",
"type": "string"
},
"partition": {
"description": "The queue to which the job was submitted",
"type": "string"
},
"array_job_id": {
"description": "The unique identifier of an array job",
"type": "integer"
},
2019-10-21 14:50:16 +02:00
"num_nodes": {
"description": "Number of nodes used",
"type": "integer",
"exclusiveMinimum": 0
},
"num_hwthreads": {
"description": "Number of HWThreads used",
"type": "integer",
"exclusiveMinimum": 0
},
"num_acc": {
"description": "Number of accelerators used",
"type": "integer",
"exclusiveMinimum": 0
},
2019-10-21 14:50:16 +02:00
"exclusive": {
"description": "Job uses only exclusive nodes",
"type": "integer"
2019-10-21 14:50:16 +02:00
},
"monitoring_status": {
"description": "State of monitoring system during job run",
2021-12-16 08:10:36 +01:00
"type": "integer"
},
"smt": {
"description": "SMT threads used by job",
"type": "integer"
},
2019-10-21 14:50:16 +02:00
"walltime": {
"description": "Requested walltime of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"job_state": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"canceled",
"stopped",
2019-10-21 14:50:16 +02:00
"timeout"
]
},
"start_time": {
"description": "Start epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"stop_time": {
"description": "Stop epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"duration": {
"description": "Duration of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"resources": {
"description": "Resources used by job",
2019-10-21 14:50:16 +02:00
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string"
},
"hwthreads": {
"type": "array",
"description": "List of OS processor ids",
"items": {
"type": "integer"
}
},
"accelerators": {
"type": "array",
"description": "List of of accelerator devices",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique device id"
},
"type": {
"type": "string",
"description": "The accelerator type",
"enum": [
"Nvidia GPU",
"AMD GPU",
"Intel GPU"
]
},
"model": {
"type": "string",
"description": "The accelerator model"
}
},
"required": [
"id",
"type",
"model"
]
}
},
"configuration": {
"type": "string",
"description": "The configuration options of the node"
},
"required": [
"hostname"
]
},
"minItems": 1
}
},
"meta_data": {
"description": "Additional information about the job",
"type": "object",
"properties": {
"job_script": {
"type": "string",
"description": "The batch script of the job"
},
"slurmdata": {
"type": "string",
"description": "Additional Slurm information"
}
}
2019-10-21 14:50:16 +02:00
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string"
}
},
"required": [
"name",
"type"
]
},
"uniqueItems": true
},
"statistics": {
"description": "Job statistic data",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used (required)",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m) (required)",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
2019-10-21 14:50:16 +02:00
"flops_any": {
"description": "Total flop rate with DP flops scaled up (required)",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"mem_bw": {
"description": "Main memory bandwidth (required)",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"net_bw": {
"description": "Total fast interconnect network bandwidth (required)",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"file_bw": {
"description": "Total file IO bandwidth (required)",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"ipc": {
"description": "Instructions executed per cycle",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"cpu_used": {
"description": "CPU active core utilization",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"flops_dp": {
"description": "Double precision flop rate",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"flops_sp": {
"description": "Single precision flops rate",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"rapl_power": {
"description": "CPU power consumption",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"gpu_used": {
"description": "GPU utilization",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"gpu_mem_used": {
"description": "GPU memory capacity used",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"gpu_power": {
"description": "GPU power consumption",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"clock": {
"description": "Average core frequency",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"pfs_read_bw": {
"description": "Parallel file system read bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"pfs_write_bw": {
"description": "Parallel file system write bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"pfs_read_req": {
"description": "Parallel file system read requests",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"pfs_write_req": {
"description": "Parallel file system write requests",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"pfs_inodes": {
"description": "Parallel file system inodes used",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"pfs_accesses": {
"description": "Parallel file system open and close",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"pfs_fsync": {
"description": "Parallel file system fsync",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"pfs_create": {
"description": "Parallel file system create",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"pfs_open": {
"description": "Parallel file system open",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"pfs_close": {
"description": "Parallel file system close",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"pfs_seek": {
"description": "Parallel file system seek",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"fs_read_bw": {
"description": "Local file system read bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"fs_write_bw": {
"description": "Local file system write bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"fs_inodes": {
"description": "Local file system inodes used",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"fs_accesses": {
"description": "Local file system open and close",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"ic_send_packets": {
"description": "Network interconnect send packet",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
2021-12-16 08:10:36 +01:00
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
2019-10-21 14:50:16 +02:00
}
},
"required": [
"mem_used",
"cpu_load",
2019-10-21 14:50:16 +02:00
"flops_any",
"mem_bw",
"net_bw",
"file_bw"
]
}
},
"required": [
"job_id",
"user_id",
"project_id",
"cluster_id",
"num_nodes",
"start_time",
"stop_time",
"duration",
"resources",
2019-10-21 14:50:16 +02:00
"tags",
"statistics"
]
}