2019-10-21 14:50:16 +02:00
|
|
|
{
|
|
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
|
|
"title": "Job meta data",
|
|
|
|
"description": "Meta data information of a HPC job",
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
2021-12-17 06:54:16 +01:00
|
|
|
"jobId": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "The unique identifier of a job",
|
2021-11-26 09:15:50 +01:00
|
|
|
"type": "integer"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"user": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "The unique identifier of a user",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"project": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "The unique identifier of a project",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"cluster": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "The unique identifier of a cluster",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-11-26 09:15:50 +01:00
|
|
|
"partition": {
|
2021-12-17 06:54:16 +01:00
|
|
|
"description": "The cluster partition to which the job was submitted",
|
2021-11-26 09:15:50 +01:00
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"arrayJobId": {
|
2021-11-26 09:15:50 +01:00
|
|
|
"description": "The unique identifier of an array job",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"numNodes": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "Number of nodes used",
|
|
|
|
"type": "integer",
|
|
|
|
"exclusiveMinimum": 0
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"numHwthreads": {
|
2021-11-26 09:15:50 +01:00
|
|
|
"description": "Number of HWThreads used",
|
|
|
|
"type": "integer",
|
|
|
|
"exclusiveMinimum": 0
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"numAcc": {
|
2021-11-29 10:26:00 +01:00
|
|
|
"description": "Number of accelerators used",
|
2021-11-26 09:15:50 +01:00
|
|
|
"type": "integer",
|
|
|
|
"exclusiveMinimum": 0
|
|
|
|
},
|
2019-10-21 14:50:16 +02:00
|
|
|
"exclusive": {
|
|
|
|
"description": "Job uses only exclusive nodes",
|
2021-11-29 10:26:00 +01:00
|
|
|
"type": "integer"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"monitoringStatus": {
|
2021-12-02 10:45:01 +01:00
|
|
|
"description": "State of monitoring system during job run",
|
2021-12-16 08:10:36 +01:00
|
|
|
"type": "integer"
|
2021-12-02 10:45:01 +01:00
|
|
|
},
|
2021-11-26 09:15:50 +01:00
|
|
|
"smt": {
|
2021-12-02 10:45:01 +01:00
|
|
|
"description": "SMT threads used by job",
|
|
|
|
"type": "integer"
|
2021-11-26 09:15:50 +01:00
|
|
|
},
|
2019-10-21 14:50:16 +02:00
|
|
|
"walltime": {
|
|
|
|
"description": "Requested walltime of job in seconds",
|
|
|
|
"type": "integer",
|
|
|
|
"exclusiveMinimum": 0
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"jobState": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "Final state of job",
|
|
|
|
"type": "string",
|
|
|
|
"enum": [
|
|
|
|
"completed",
|
|
|
|
"failed",
|
|
|
|
"canceled",
|
2021-11-26 09:15:50 +01:00
|
|
|
"stopped",
|
2019-10-21 14:50:16 +02:00
|
|
|
"timeout"
|
|
|
|
]
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"startTime": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "Start epoch time stamp in seconds",
|
|
|
|
"type": "integer",
|
|
|
|
"exclusiveMinimum": 0
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"stopTime": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "Stop epoch time stamp in seconds",
|
|
|
|
"type": "integer",
|
|
|
|
"exclusiveMinimum": 0
|
|
|
|
},
|
|
|
|
"duration": {
|
|
|
|
"description": "Duration of job in seconds",
|
|
|
|
"type": "integer",
|
|
|
|
"exclusiveMinimum": 0
|
|
|
|
},
|
2021-11-26 09:15:50 +01:00
|
|
|
"resources": {
|
|
|
|
"description": "Resources used by job",
|
2019-10-21 14:50:16 +02:00
|
|
|
"type": "array",
|
|
|
|
"items": {
|
2021-11-26 09:15:50 +01:00
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"hostname": {
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-11-29 10:26:00 +01:00
|
|
|
"hwthreads": {
|
2021-11-26 09:15:50 +01:00
|
|
|
"type": "array",
|
|
|
|
"description": "List of OS processor ids",
|
|
|
|
"items": {
|
|
|
|
"type": "integer"
|
|
|
|
}
|
|
|
|
},
|
2021-11-29 10:26:00 +01:00
|
|
|
"accelerators": {
|
2021-11-26 09:15:50 +01:00
|
|
|
"type": "array",
|
2021-12-17 06:54:16 +01:00
|
|
|
"description": "List of of accelerator ids",
|
2021-11-26 09:15:50 +01:00
|
|
|
"items": {
|
2021-12-17 06:54:16 +01:00
|
|
|
"type": "integer"
|
2021-11-26 09:15:50 +01:00
|
|
|
}
|
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"configuration": {
|
|
|
|
"type": "string",
|
|
|
|
"description": "The configuration options of the node"
|
|
|
|
},
|
2021-11-26 09:15:50 +01:00
|
|
|
"required": [
|
|
|
|
"hostname"
|
|
|
|
]
|
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"minItems": 1
|
|
|
|
}
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"metaData": {
|
2021-12-02 10:45:01 +01:00
|
|
|
"description": "Additional information about the job",
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
2021-12-17 06:54:16 +01:00
|
|
|
"jobScript": {
|
2021-12-02 10:45:01 +01:00
|
|
|
"type": "string",
|
|
|
|
"description": "The batch script of the job"
|
|
|
|
},
|
|
|
|
"slurmdata": {
|
|
|
|
"type": "string",
|
|
|
|
"description": "Additional Slurm information"
|
|
|
|
}
|
|
|
|
}
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"tags": {
|
|
|
|
"description": "List of tags",
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"name": {
|
|
|
|
"type": "string"
|
|
|
|
},
|
|
|
|
"type": {
|
|
|
|
"type": "string"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"name",
|
|
|
|
"type"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"uniqueItems": true
|
|
|
|
},
|
|
|
|
"statistics": {
|
|
|
|
"description": "Job statistic data",
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"mem_used": {
|
|
|
|
"description": "Memory capacity used (required)",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"cpu_load": {
|
|
|
|
"description": "CPU requested core utilization (load 1m) (required)",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2021-12-02 10:45:01 +01:00
|
|
|
},
|
2019-10-21 14:50:16 +02:00
|
|
|
"flops_any": {
|
|
|
|
"description": "Total flop rate with DP flops scaled up (required)",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"mem_bw": {
|
|
|
|
"description": "Main memory bandwidth (required)",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"net_bw": {
|
|
|
|
"description": "Total fast interconnect network bandwidth (required)",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"file_bw": {
|
|
|
|
"description": "Total file IO bandwidth (required)",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"ipc": {
|
|
|
|
"description": "Instructions executed per cycle",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"cpu_used": {
|
2021-12-02 10:45:01 +01:00
|
|
|
"description": "CPU active core utilization",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"flops_dp": {
|
|
|
|
"description": "Double precision flop rate",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"flops_sp": {
|
|
|
|
"description": "Single precision flops rate",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"rapl_power": {
|
|
|
|
"description": "CPU power consumption",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"gpu_used": {
|
|
|
|
"description": "GPU utilization",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"gpu_mem_used": {
|
|
|
|
"description": "GPU memory capacity used",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"gpu_power": {
|
|
|
|
"description": "GPU power consumption",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"clock": {
|
|
|
|
"description": "Average core frequency",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"eth_read_bw": {
|
|
|
|
"description": "Ethernet read bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
|
|
|
"eth_write_bw": {
|
|
|
|
"description": "Ethernet write bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"pfs_read_bw": {
|
|
|
|
"description": "Parallel file system read bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2021-12-02 10:45:01 +01:00
|
|
|
},
|
|
|
|
"pfs_write_bw": {
|
|
|
|
"description": "Parallel file system write bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2021-12-02 10:45:01 +01:00
|
|
|
},
|
|
|
|
"pfs_read_req": {
|
|
|
|
"description": "Parallel file system read requests",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2021-12-02 10:45:01 +01:00
|
|
|
},
|
|
|
|
"pfs_write_req": {
|
|
|
|
"description": "Parallel file system write requests",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2021-12-02 10:45:01 +01:00
|
|
|
},
|
|
|
|
"pfs_inodes": {
|
|
|
|
"description": "Parallel file system inodes used",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2021-12-02 10:45:01 +01:00
|
|
|
},
|
|
|
|
"pfs_accesses": {
|
|
|
|
"description": "Parallel file system open and close",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"pfs_fsync": {
|
|
|
|
"description": "Parallel file system fsync",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"pfs_create": {
|
|
|
|
"description": "Parallel file system create",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"pfs_open": {
|
|
|
|
"description": "Parallel file system open",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"pfs_close": {
|
|
|
|
"description": "Parallel file system close",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"pfs_seek": {
|
|
|
|
"description": "Parallel file system seek",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"fs_read_bw": {
|
|
|
|
"description": "Local file system read bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"fs_write_bw": {
|
|
|
|
"description": "Local file system write bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"fs_inodes": {
|
|
|
|
"description": "Local file system inodes used",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"fs_accesses": {
|
|
|
|
"description": "Local file system open and close",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"ic_rcv_packets": {
|
|
|
|
"description": "Network interconnect read packets",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"ic_send_packets": {
|
|
|
|
"description": "Network interconnect send packet",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"ic_read_bw": {
|
|
|
|
"description": "Network interconnect read bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
},
|
2021-12-02 10:45:01 +01:00
|
|
|
"ic_write_bw": {
|
|
|
|
"description": "Network interconnect write bandwidth",
|
2021-12-17 06:54:16 +01:00
|
|
|
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
|
2019-10-21 14:50:16 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"mem_used",
|
2021-12-02 10:45:01 +01:00
|
|
|
"cpu_load",
|
2019-10-21 14:50:16 +02:00
|
|
|
"flops_any",
|
|
|
|
"mem_bw",
|
|
|
|
"net_bw",
|
|
|
|
"file_bw"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
2021-12-17 06:54:16 +01:00
|
|
|
"jobId",
|
|
|
|
"user",
|
|
|
|
"project",
|
|
|
|
"cluster",
|
|
|
|
"numNodes",
|
|
|
|
"startTime",
|
|
|
|
"stopTime",
|
2019-10-21 14:50:16 +02:00
|
|
|
"duration",
|
2021-11-26 09:15:50 +01:00
|
|
|
"resources",
|
2019-10-21 14:50:16 +02:00
|
|
|
"tags",
|
|
|
|
"statistics"
|
|
|
|
]
|
|
|
|
}
|