Introduce resolution variants and stats

Add statistics across topology
This commit is contained in:
Jan Eitzinger 2021-12-17 09:38:41 +01:00
parent fc69a29b34
commit 183002ab19
4 changed files with 176 additions and 40 deletions

View File

@ -47,6 +47,10 @@
"description": "Metric unit", "description": "Metric unit",
"type": "string" "type": "string"
}, },
"scope": {
"description": "Native measurement resolution",
"type": "string"
},
"timestep": { "timestep": {
"description": "Frequency of timeseries points", "description": "Frequency of timeseries points",
"type": "integer" "type": "integer"

View File

@ -6,155 +6,163 @@
"properties": { "properties": {
"mem_used": { "mem_used": {
"description": "Memory capacity used (required)", "description": "Memory capacity used (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json" "type": "object",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
}
}, },
"flops_any": { "flops_any": {
"description": "Total flop rate with DP flops scaled up (required)", "description": "Total flop rate with DP flops scaled up (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json" "properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
}
}, },
"mem_bw": { "mem_bw": {
"description": "Main memory bandwidth (required)", "description": "Main memory bandwidth (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json" "properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
}
}, },
"net_bw": { "net_bw": {
"description": "Total fast interconnect network bandwidth (required)", "description": "Total fast interconnect network bandwidth (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json" "type": "object",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
}
}, },
"file_bw": { "file_bw": {
"description": "Total file IO bandwidth (required)", "description": "Total file IO bandwidth (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json" "type": "object",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
}
}, },
"ipc": { "ipc": {
"description": "Instructions executed per cycle", "description": "Instructions executed per cycle",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"cpu_used": { "cpu_used": {
"description": "CPU active core utilization", "description": "CPU active core utilization",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"cpu_load": { "cpu_load": {
"description": "CPU requested core utilization (load 1m)", "description": "CPU requested core utilization (load 1m)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"flops_dp": { "flops_dp": {
"description": "Double precision flop rate", "description": "Double precision flop rate",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"flops_sp": { "flops_sp": {
"description": "Single precision flops rate", "description": "Single precision flops rate",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"vectorization_ratio": { "vectorization_ratio": {
"description": "Fraction of arithmetic instructions using SIMD instructions", "description": "Fraction of arithmetic instructions using SIMD instructions",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"cpu_power": { "cpu_power": {
"description": "CPU power consumption", "description": "CPU power consumption",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"mem_power": { "mem_power": {
"description": "Memory power consumption", "description": "Memory power consumption",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"acc_utilization": { "acc_utilization": {
"description": "GPU utilization", "description": "GPU utilization",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"acc_mem_used": { "acc_mem_used": {
"description": "GPU memory capacity used", "description": "GPU memory capacity used",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"acc_power": { "acc_power": {
"description": "GPU power consumption", "description": "GPU power consumption",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"clock": { "clock": {
"description": "Average core frequency", "description": "Average core frequency",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"eth_read_bw": { "eth_read_bw": {
"description": "Ethernet read bandwidth", "description": "Ethernet read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"eth_write_bw": { "eth_write_bw": {
"description": "Ethernet write bandwidth", "description": "Ethernet write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_read_bw": { "pfs_read_bw": {
"description": "Parallel file system read bandwidth", "description": "Parallel file system read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_write_bw": { "pfs_write_bw": {
"description": "Parallel file system write bandwidth", "description": "Parallel file system write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_read_req": { "pfs_read_req": {
"description": "Parallel file system read requests", "description": "Parallel file system read requests",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_write_req": { "pfs_write_req": {
"description": "Parallel file system write requests", "description": "Parallel file system write requests",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_inodes": { "pfs_inodes": {
"description": "Parallel file system inodes used", "description": "Parallel file system inodes used",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_accesses": { "pfs_accesses": {
"description": "Parallel file system open and close", "description": "Parallel file system open and close",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_fsync": { "pfs_fsync": {
"description": "Parallel file system fsync", "description": "Parallel file system fsync",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_create": { "pfs_create": {
"description": "Parallel file system create", "description": "Parallel file system create",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_open": { "pfs_open": {
"description": "Parallel file system open", "description": "Parallel file system open",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_close": { "pfs_close": {
"description": "Parallel file system close", "description": "Parallel file system close",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"pfs_seek": { "pfs_seek": {
"description": "Parallel file system seek", "description": "Parallel file system seek",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"fs_read_bw": { "fs_read_bw": {
"description": "Local file system read bandwidth", "description": "Local file system read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"fs_write_bw": { "fs_write_bw": {
"description": "Local file system write bandwidth", "description": "Local file system write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"fs_inodes": { "fs_inodes": {
"description": "Local file system inodes used", "description": "Local file system inodes used",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"fs_accesses": { "fs_accesses": {
"description": "Local file system open and close", "description": "Local file system open and close",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"ic_rcv_packets": { "ic_rcv_packets": {
"description": "Network interconnect read packets", "description": "Network interconnect read packets",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"ic_send_packets": { "ic_send_packets": {
"description": "Network interconnect send packet", "description": "Network interconnect send packet",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"ic_read_bw": { "ic_read_bw": {
"description": "Network interconnect read bandwidth", "description": "Network interconnect read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}, },
"ic_write_bw": { "ic_write_bw": {
"description": "Network interconnect write bandwidth", "description": "Network interconnect write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
} }
}, },
"required": [ "required": [

View File

@ -21,8 +21,8 @@
"type": "string" "type": "string"
}, },
"partition": { "partition": {
"description": "The cluster partition to which the job was submitted", "description": "The cluster partition id to which the job was submitted",
"type": "string" "type": "integer"
}, },
"arrayJobId": { "arrayJobId": {
"description": "The unique identifier of an array job", "description": "The unique identifier of an array job",

View File

@ -41,6 +41,129 @@
} }
} }
}, },
"statisticsSeries": {
"type": "object",
"description": "Statistics series across topology",
"properties": {
"min": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"max": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"mean": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"percentiles": {
"type": "object",
"properties": {
"10": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"20": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"30": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"40": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"50": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"60": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"70": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"80": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"90": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"25": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"75": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
}
}
}
}
},
"series": { "series": {
"type": "array", "type": "array",
"items": { "items": {
@ -54,6 +177,7 @@
}, },
"statistics": { "statistics": {
"type": "object", "type": "object",
"description": "Statistics across time dimension",
"properties": { "properties": {
"avg": { "avg": {
"description": "Series average", "description": "Series average",