2022-09-20 07:05:01 +02:00
{
2024-12-19 06:14:35 +01:00
"$schema" : "http://json-schema.org/draft/2020-12/schema" ,
"$id" : "embedfs://job-meta.schema.json" ,
"title" : "Job meta data" ,
"description" : "Meta data information of a HPC job" ,
"type" : "object" ,
"properties" : {
"jobId" : {
"description" : "The unique identifier of a job" ,
"type" : "integer"
} ,
"user" : {
"description" : "The unique identifier of a user" ,
"type" : "string"
} ,
"project" : {
"description" : "The unique identifier of a project" ,
"type" : "string"
} ,
"cluster" : {
"description" : "The unique identifier of a cluster" ,
"type" : "string"
} ,
"subCluster" : {
"description" : "The unique identifier of a sub cluster" ,
"type" : "string"
} ,
"partition" : {
"description" : "The Slurm partition to which the job was submitted" ,
"type" : "string"
} ,
"arrayJobId" : {
"description" : "The unique identifier of an array job" ,
"type" : "integer"
} ,
"numNodes" : {
"description" : "Number of nodes used" ,
"type" : "integer" ,
"exclusiveMinimum" : 0
} ,
"numHwthreads" : {
"description" : "Number of HWThreads used" ,
"type" : "integer" ,
"exclusiveMinimum" : 0
} ,
"numAcc" : {
"description" : "Number of accelerators used" ,
"type" : "integer" ,
"exclusiveMinimum" : 0
} ,
"exclusive" : {
"description" : "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user" ,
"type" : "integer" ,
"minimum" : 0 ,
"maximum" : 2
} ,
"monitoringStatus" : {
"description" : "State of monitoring system during job run" ,
"type" : "integer"
} ,
"smt" : {
"description" : "SMT threads used by job" ,
"type" : "integer"
} ,
"walltime" : {
"description" : "Requested walltime of job in seconds" ,
"type" : "integer" ,
"exclusiveMinimum" : 0
} ,
"jobState" : {
"description" : "Final state of job" ,
"type" : "string" ,
"enum" : [
"completed" ,
"failed" ,
"cancelled" ,
"stopped" ,
"out_of_memory" ,
"timeout"
]
} ,
"startTime" : {
"description" : "Start epoch time stamp in seconds" ,
"type" : "integer" ,
"exclusiveMinimum" : 0
} ,
"duration" : {
"description" : "Duration of job in seconds" ,
"type" : "integer" ,
"exclusiveMinimum" : 0
} ,
"resources" : {
"description" : "Resources used by job" ,
"type" : "array" ,
"items" : {
"type" : "object" ,
"properties" : {
"hostname" : {
2022-09-20 07:05:01 +02:00
"type" : "string"
2024-12-19 06:14:35 +01:00
} ,
"hwthreads" : {
"type" : "array" ,
"description" : "List of OS processor ids" ,
"items" : {
"type" : "integer"
}
} ,
"accelerators" : {
"type" : "array" ,
"description" : "List of of accelerator device ids" ,
"items" : {
"type" : "string"
}
} ,
"configuration" : {
"type" : "string" ,
"description" : "The configuration options of the node"
}
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"required" : [
"hostname"
] ,
"minItems" : 1
}
} ,
"metaData" : {
"description" : "Additional information about the job" ,
"type" : "object" ,
"properties" : {
"jobScript" : {
"type" : "string" ,
"description" : "The batch script of the job"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"jobName" : {
"type" : "string" ,
"description" : "Slurm Job name"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"slurmInfo" : {
"type" : "string" ,
"description" : "Additional slurm infos as show by scontrol show job"
}
}
} ,
"tags" : {
"description" : "List of tags" ,
"type" : "array" ,
"items" : {
"type" : "object" ,
"properties" : {
"name" : {
2022-09-20 07:05:01 +02:00
"type" : "string"
2024-12-19 06:14:35 +01:00
} ,
"type" : {
2022-09-20 07:05:01 +02:00
"type" : "string"
2024-12-19 06:14:35 +01:00
}
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"required" : [
"name" ,
"type"
]
} ,
"uniqueItems" : true
} ,
"statistics" : {
"description" : "Job statistic data" ,
"type" : "object" ,
"properties" : {
"mem_used" : {
"description" : "Memory capacity used (required)" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"cpu_load" : {
"description" : "CPU requested core utilization (load 1m) (required)" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"flops_any" : {
"description" : "Total flop rate with DP flops scaled up (required)" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"mem_bw" : {
"description" : "Main memory bandwidth (required)" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"net_bw" : {
"description" : "Total fast interconnect network bandwidth (required)" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"file_bw" : {
"description" : "Total file IO bandwidth (required)" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"ipc" : {
"description" : "Instructions executed per cycle" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"cpu_user" : {
"description" : "CPU user active core utilization" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"flops_dp" : {
"description" : "Double precision flop rate" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"flops_sp" : {
"description" : "Single precision flops rate" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"rapl_power" : {
"description" : "CPU power consumption" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"acc_used" : {
"description" : "GPU utilization" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"acc_mem_used" : {
"description" : "GPU memory capacity used" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"acc_power" : {
"description" : "GPU power consumption" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"clock" : {
"description" : "Average core frequency" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"eth_read_bw" : {
"description" : "Ethernet read bandwidth" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"eth_write_bw" : {
"description" : "Ethernet write bandwidth" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
2022-09-20 07:05:01 +02:00
} ,
2024-12-19 06:14:35 +01:00
"ic_rcv_packets" : {
"description" : "Network interconnect read packets" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"ic_send_packets" : {
"description" : "Network interconnect send packet" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"ic_read_bw" : {
"description" : "Network interconnect read bandwidth" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"ic_write_bw" : {
"description" : "Network interconnect write bandwidth" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"filesystems" : {
"description" : "Array of filesystems" ,
"type" : "array" ,
"items" : {
2022-09-20 07:05:01 +02:00
"type" : "object" ,
"properties" : {
2024-12-19 06:14:35 +01:00
"name" : {
"type" : "string"
} ,
"type" : {
"type" : "string" ,
"enum" : [
"nfs" ,
"lustre" ,
"gpfs" ,
"nvme" ,
"ssd" ,
"hdd" ,
"beegfs"
]
} ,
"read_bw" : {
"description" : "File system read bandwidth" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"write_bw" : {
"description" : "File system write bandwidth" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"read_req" : {
"description" : "File system read requests" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"write_req" : {
"description" : "File system write requests" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"inodes" : {
"description" : "File system write requests" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"accesses" : {
"description" : "File system open and close" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"fsync" : {
"description" : "File system fsync" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"create" : {
"description" : "File system create" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"open" : {
"description" : "File system open" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"close" : {
"description" : "File system close" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
} ,
"seek" : {
"description" : "File system seek" ,
"$ref" : "embedfs://job-metric-statistics.schema.json"
}
2022-09-20 07:05:01 +02:00
} ,
"required" : [
2024-12-19 06:14:35 +01:00
"name" ,
"type" ,
"read_bw" ,
"write_bw"
2022-09-20 07:05:01 +02:00
]
2024-12-19 06:14:35 +01:00
} ,
"minItems" : 1
2022-09-20 07:05:01 +02:00
}
2024-12-19 06:14:35 +01:00
} ,
"required" : [
"cpu_user" ,
"cpu_load" ,
"mem_used" ,
"flops_any" ,
"mem_bw"
]
}
} ,
"required" : [
"jobId" ,
"user" ,
"project" ,
"cluster" ,
"subCluster" ,
"numNodes" ,
"exclusive" ,
"startTime" ,
"jobState" ,
"duration" ,
"resources" ,
"statistics"
]
2022-09-20 07:05:01 +02:00
}