2021-12-17 15:49:22 +01:00
scalar Time
2022-02-22 09:19:59 +01:00
scalar Any
2021-12-17 15:49:22 +01:00
scalar NullableFloat
scalar MetricScope
scalar JobState
2021-03-31 07:23:48 +02:00
type Job {
2021-12-17 15:49:22 +01:00
id : ID !
jobId : Int !
user : String !
project : String !
cluster : String !
2022-03-14 09:08:02 +01:00
subCluster : String !
2021-12-17 15:49:22 +01:00
startTime : Time !
duration : Int !
2022-03-14 09:08:02 +01:00
walltime : Int !
2021-12-17 15:49:22 +01:00
numNodes : Int !
numHWThreads : Int !
numAcc : Int !
2021-12-16 13:17:48 +01:00
SMT : Int !
2021-12-17 15:49:22 +01:00
exclusive : Int !
partition : String !
arrayJobId : Int !
monitoringStatus : Int !
state : JobState !
tags : [ Tag ! ] !
2022-01-07 09:44:34 +01:00
resources : [ Resource ! ] !
2023-05-16 12:42:06 +02:00
concurrentJobs : JobLinkResultList
2024-07-11 17:23:59 +02:00
footprint : [ FootprintValue ]
2022-03-15 11:04:54 +01:00
metaData : Any
userData : User
2021-12-16 13:17:48 +01:00
}
2023-04-28 12:34:40 +02:00
type JobLink {
id : ID !
jobId : Int !
}
2021-12-17 15:49:22 +01:00
type Cluster {
name : String !
2022-03-14 10:24:27 +01:00
partitions : [ String ! ] ! # Slurm partitions
subClusters : [ SubCluster ! ] ! # Hardware partitions/subclusters
2021-12-16 13:17:48 +01:00
}
2022-03-14 10:18:56 +01:00
type SubCluster {
2021-12-17 15:49:22 +01:00
name : String !
2022-03-14 10:18:56 +01:00
nodes : String !
2023-03-24 15:10:23 +01:00
numberOfNodes : Int !
2021-12-17 15:49:22 +01:00
processorType : String !
socketsPerNode : Int !
coresPerSocket : Int !
threadsPerCore : Int !
2023-03-10 12:13:40 +01:00
flopRateScalar : MetricValue !
flopRateSimd : MetricValue !
memoryBandwidth : MetricValue !
2021-12-17 15:49:22 +01:00
topology : Topology !
2024-07-05 16:16:01 +02:00
metricConfig : [ MetricConfig ! ] !
footprint : [ String ! ] !
2021-10-26 10:22:02 +02:00
}
2024-07-11 17:23:59 +02:00
type FootprintValue {
name : String !
stat : String !
value : Float !
}
2023-03-10 12:13:40 +01:00
type MetricValue {
2024-06-28 16:49:02 +02:00
name : String
2023-03-10 12:13:40 +01:00
unit : Unit !
value : Float !
}
2021-12-17 15:49:22 +01:00
type Topology {
node : [ Int ! ]
socket : [ [ Int ! ] ! ]
memoryDomain : [ [ Int ! ] ! ]
die : [ [ Int ! ] ! ]
core : [ [ Int ! ] ! ]
accelerators : [ Accelerator ! ]
2021-09-23 11:45:20 +02:00
}
2021-04-21 10:12:19 +02:00
2021-12-17 15:49:22 +01:00
type Accelerator {
id : String !
type : String !
model : String !
2021-04-07 09:19:21 +02:00
}
2022-05-12 09:20:38 +02:00
type SubClusterConfig {
name : String !
2023-03-21 11:51:11 +01:00
peak : Float
normal : Float
caution : Float
alert : Float
remove : Boolean
2022-05-12 09:20:38 +02:00
}
2021-12-17 15:49:22 +01:00
type MetricConfig {
2022-03-21 13:28:21 +01:00
name : String !
2023-03-21 11:51:11 +01:00
unit : Unit !
2022-03-21 13:28:21 +01:00
scope : MetricScope !
2023-03-21 11:51:11 +01:00
aggregation : String !
2022-03-21 13:28:21 +01:00
timestep : Int !
2023-03-20 16:27:36 +01:00
peak : Float !
2023-03-21 11:51:11 +01:00
normal : Float
2023-03-20 16:27:36 +01:00
caution : Float !
alert : Float !
2024-07-11 16:58:12 +02:00
lowerIsBetter : Boolean
2023-03-21 11:51:11 +01:00
subClusters : [ SubClusterConfig ! ] !
2021-12-17 15:49:22 +01:00
}
type Tag {
id : ID !
type : String !
name : String !
2024-08-01 18:59:24 +02:00
scope : String !
2021-04-22 15:00:54 +02:00
}
2022-01-07 09:44:34 +01:00
type Resource {
2021-12-17 15:49:22 +01:00
hostname : String !
hwthreads : [ Int ! ]
2022-05-04 09:22:55 +02:00
accelerators : [ String ! ]
2021-12-17 15:49:22 +01:00
configuration : String
}
type JobMetricWithName {
2021-12-20 10:48:58 +01:00
name : String !
2023-03-24 14:43:37 +01:00
scope : MetricScope !
2021-12-20 10:48:58 +01:00
metric : JobMetric !
2021-04-22 15:00:54 +02:00
}
2021-04-07 09:19:21 +02:00
type JobMetric {
2022-10-06 18:04:47 +02:00
unit : Unit
2021-12-17 15:49:22 +01:00
timestep : Int !
2022-01-07 09:44:34 +01:00
series : [ Series ! ]
statisticsSeries : StatsSeries
2021-04-07 09:19:21 +02:00
}
2021-12-17 15:49:22 +01:00
type Series {
hostname : String !
2023-03-22 19:21:11 +01:00
id : String
2021-12-17 15:49:22 +01:00
statistics : MetricStatistics
data : [ NullableFloat ! ] !
2021-04-07 09:19:21 +02:00
}
2022-10-06 18:04:47 +02:00
type Unit {
base : String !
prefix : String
}
2021-12-17 15:49:22 +01:00
type MetricStatistics {
2022-01-27 09:29:53 +01:00
avg : Float !
min : Float !
max : Float !
2021-04-07 09:19:21 +02:00
}
2021-12-17 15:49:22 +01:00
type StatsSeries {
2024-05-08 16:17:42 +02:00
mean : [ NullableFloat ! ] !
median : [ NullableFloat ! ] !
min : [ NullableFloat ! ] !
max : [ NullableFloat ! ] !
2021-04-14 18:53:18 +02:00
}
2021-10-26 10:22:02 +02:00
type MetricFootprints {
2022-03-16 16:11:28 +01:00
metric : String !
data : [ NullableFloat ! ] !
}
type Footprints {
2023-08-24 11:52:36 +02:00
timeWeights : TimeWeights !
2022-03-16 16:11:28 +01:00
metrics : [ MetricFootprints ! ] !
2021-10-26 10:22:02 +02:00
}
2023-08-24 11:52:36 +02:00
type TimeWeights {
nodeHours : [ NullableFloat ! ] !
accHours : [ NullableFloat ! ] !
coreHours : [ NullableFloat ! ] !
}
2021-10-26 10:22:02 +02:00
enum Aggregate { USER , PROJECT , CLUSTER }
2023-08-29 17:38:17 +02:00
enum SortByAggregate { TOTALWALLTIME , TOTALJOBS , TOTALNODES , TOTALNODEHOURS , TOTALCORES , TOTALCOREHOURS , TOTALACCS , TOTALACCHOURS }
2021-10-26 10:22:02 +02:00
2021-12-09 16:25:48 +01:00
type NodeMetrics {
2022-03-24 14:34:42 +01:00
host : String !
subCluster : String !
metrics : [ JobMetricWithName ! ] !
2021-12-09 16:25:48 +01:00
}
2024-07-11 11:09:14 +02:00
type ClusterSupport {
cluster : String !
subClusters : [ String ! ] !
}
type GlobalMetricListItem {
name : String !
unit : Unit !
scope : MetricScope !
2024-07-20 08:59:07 +02:00
footprint : String
2024-07-11 11:09:14 +02:00
availability : [ ClusterSupport ! ] !
}
2022-02-19 10:28:29 +01:00
type Count {
name : String !
count : Int !
}
2022-03-15 11:04:54 +01:00
type User {
username : String !
name : String !
email : String !
}
2024-07-12 13:21:19 +02:00
input MetricStatItem {
metricName : String !
range : FloatRange !
}
2021-03-31 07:23:48 +02:00
type Query {
2021-10-26 10:22:02 +02:00
clusters : [ Cluster ! ] ! # List of all clusters
2022-01-31 15:16:05 +01:00
tags : [ Tag ! ] ! # List of all tags
2024-07-11 11:09:14 +02:00
globalMetrics : [ GlobalMetricListItem ! ] !
2021-10-26 10:22:02 +02:00
2022-03-15 11:04:54 +01:00
user ( username : String ! ) : User
2022-03-24 16:08:47 +01:00
allocatedNodes ( cluster : String ! ) : [ Count ! ] !
2022-03-15 11:04:54 +01:00
2021-10-26 10:22:02 +02:00
job ( id : ID ! ) : Job
2024-08-16 14:50:31 +02:00
jobMetrics ( id : ID ! , metrics : [ String ! ] , scopes : [ MetricScope ! ] , resolution : Int ) : [ JobMetricWithName ! ] !
2022-03-16 16:11:28 +01:00
jobsFootprints ( filter : [ JobFilter ! ] , metrics : [ String ! ] ! ) : Footprints
2021-10-26 10:22:02 +02:00
jobs ( filter : [ JobFilter ! ] , page : PageRequest , order : OrderByInput ) : JobResultList !
2023-12-01 13:22:01 +01:00
jobsStatistics ( filter : [ JobFilter ! ] , metrics : [ String ! ] , page : PageRequest , sortBy : SortByAggregate , groupBy : Aggregate ) : [ JobsStatistics ! ] !
2021-10-26 10:22:02 +02:00
rooflineHeatmap ( filter : [ JobFilter ! ] ! , rows : Int ! , cols : Int ! , minX : Float ! , minY : Float ! , maxX : Float ! , maxY : Float ! ) : [ [ Float ! ] ! ] !
2021-12-09 16:25:48 +01:00
2022-03-24 14:34:42 +01:00
nodeMetrics ( cluster : String ! , nodes : [ String ! ] , scopes : [ MetricScope ! ] , metrics : [ String ! ] , from : Time ! , to : Time ! ) : [ NodeMetrics ! ] !
2021-03-31 07:23:48 +02:00
}
2021-05-21 09:30:15 +02:00
type Mutation {
2024-08-01 18:59:24 +02:00
createTag ( type : String ! , name : String ! , scope : String ! ) : Tag !
2021-05-21 09:30:15 +02:00
deleteTag ( id : ID ! ) : ID !
2021-12-17 15:49:22 +01:00
addTagsToJob ( job : ID ! , tagIds : [ ID ! ] ! ) : [ Tag ! ] !
removeTagsFromJob ( job : ID ! , tagIds : [ ID ! ] ! ) : [ Tag ! ] !
2021-09-23 11:45:20 +02:00
updateConfiguration ( name : String ! , value : String ! ) : String
}
2021-12-17 15:49:22 +01:00
type IntRangeOutput { from : Int ! , to : Int ! }
type TimeRangeOutput { from : Time ! , to : Time ! }
2021-09-23 11:45:20 +02:00
2021-03-31 07:23:48 +02:00
input JobFilter {
2021-10-26 10:22:02 +02:00
tags : [ ID ! ]
jobId : StringInput
2022-01-27 10:40:48 +01:00
arrayJobId : Int
2021-12-16 13:17:48 +01:00
user : StringInput
project : StringInput
2023-01-11 16:25:02 +01:00
jobName : StringInput
2021-12-16 13:17:48 +01:00
cluster : StringInput
2022-01-27 10:40:48 +01:00
partition : StringInput
2021-10-26 10:22:02 +02:00
duration : IntRange
2022-01-27 12:32:28 +01:00
2022-03-02 10:48:52 +01:00
minRunningFor : Int
2022-01-27 12:32:28 +01:00
numNodes : IntRange
numAccelerators : IntRange
numHWThreads : IntRange
2021-10-26 10:22:02 +02:00
startTime : TimeRange
2021-12-17 15:49:22 +01:00
state : [ JobState ! ]
2024-07-12 13:21:19 +02:00
metricStats : [ MetricStatItem ! ]
2023-04-28 12:34:40 +02:00
exclusive : Int
2023-06-28 13:35:41 +02:00
node : StringInput
2021-03-31 07:23:48 +02:00
}
input OrderByInput {
field : String !
2024-07-22 15:41:33 +02:00
type : String ! ,
2021-10-26 10:22:02 +02:00
order : SortDirectionEnum ! = ASC
2021-03-31 07:23:48 +02:00
}
enum SortDirectionEnum {
DESC
ASC
}
input StringInput {
2021-10-26 10:22:02 +02:00
eq : String
2023-04-28 12:34:40 +02:00
neq : String
2021-10-26 10:22:02 +02:00
contains : String
2021-03-31 07:23:48 +02:00
startsWith : String
2021-10-26 10:22:02 +02:00
endsWith : String
2023-02-20 10:20:08 +01:00
in : [ String ! ]
2021-03-31 07:23:48 +02:00
}
2021-12-17 15:49:22 +01:00
input IntRange { from : Int ! , to : Int ! }
input TimeRange { from : Time , to : Time }
2021-03-31 07:23:48 +02:00
2024-07-12 13:21:19 +02:00
input FloatRange {
from : Float !
to : Float !
}
2021-03-31 07:23:48 +02:00
type JobResultList {
2021-10-26 10:22:02 +02:00
items : [ Job ! ] !
2021-03-31 07:23:48 +02:00
offset : Int
2021-10-26 10:22:02 +02:00
limit : Int
count : Int
2024-03-26 16:27:04 +01:00
hasNextPage : Boolean
2021-03-31 07:23:48 +02:00
}
2023-04-28 12:34:40 +02:00
type JobLinkResultList {
2023-06-28 13:35:41 +02:00
listQuery : String
2023-04-28 12:34:40 +02:00
items : [ JobLink ! ] !
count : Int
}
2021-03-31 07:23:48 +02:00
type HistoPoint {
count : Int !
value : Int !
}
2023-12-01 13:22:01 +01:00
type MetricHistoPoints {
metric : String !
2023-12-05 15:30:40 +01:00
unit : String !
2024-07-22 15:41:33 +02:00
stat : String
2023-12-05 11:59:01 +01:00
data : [ MetricHistoPoint ! ]
}
type MetricHistoPoint {
2023-12-08 12:03:04 +01:00
bin : Int
2023-12-05 11:59:01 +01:00
count : Int !
2023-12-08 12:03:04 +01:00
min : Int
max : Int
2023-12-01 13:22:01 +01:00
}
2021-03-31 07:23:48 +02:00
type JobsStatistics {
2021-10-26 10:22:02 +02:00
id : ID ! # If `groupBy` was used, ID of the user/project/cluster
2023-06-07 14:13:59 +02:00
name : String ! # if User-Statistics: Given Name of Account (ID) Owner
2023-06-09 09:09:41 +02:00
totalJobs : Int ! # Number of jobs
runningJobs : Int ! # Number of running jobs
shortJobs : Int ! # Number of jobs with a duration of less than duration
2021-10-26 10:22:02 +02:00
totalWalltime : Int ! # Sum of the duration of all matched jobs in hours
2023-08-25 17:38:25 +02:00
totalNodes : Int ! # Sum of the nodes of all matched jobs
2023-06-09 09:09:41 +02:00
totalNodeHours : Int ! # Sum of the node hours of all matched jobs
2023-08-25 17:38:25 +02:00
totalCores : Int ! # Sum of the cores of all matched jobs
2021-10-26 10:22:02 +02:00
totalCoreHours : Int ! # Sum of the core hours of all matched jobs
2023-08-25 17:38:25 +02:00
totalAccs : Int ! # Sum of the accs of all matched jobs
2023-06-09 09:09:41 +02:00
totalAccHours : Int ! # Sum of the gpu hours of all matched jobs
2022-03-31 09:44:26 +02:00
histDuration : [ HistoPoint ! ] ! # value: hour, count: number of jobs with a rounded duration of value
2021-10-26 10:22:02 +02:00
histNumNodes : [ HistoPoint ! ] ! # value: number of nodes, count: number of jobs with that number of nodes
2023-08-29 14:01:01 +02:00
histNumCores : [ HistoPoint ! ] ! # value: number of cores, count: number of jobs with that number of cores
histNumAccs : [ HistoPoint ! ] ! # value: number of accs, count: number of jobs with that number of accs
2023-12-01 13:22:01 +01:00
histMetrics : [ MetricHistoPoints ! ] ! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
2021-09-23 11:45:20 +02:00
}
2021-03-31 07:23:48 +02:00
input PageRequest {
2021-09-23 11:45:20 +02:00
itemsPerPage : Int !
2021-10-26 10:22:02 +02:00
page : Int !
2021-03-31 07:23:48 +02:00
}