Update to cc-backend version 1.0.0

This commit is contained in:
Jan Eitzinger 2023-06-27 15:08:03 +02:00
parent 1de04dd30d
commit f0bccc8229
10 changed files with 2628 additions and 320 deletions

View File

@ -4,7 +4,7 @@
"title": "HPC cluster description", "title": "HPC cluster description",
"description": "Meta data information of a HPC cluster", "description": "Meta data information of a HPC cluster",
"type": "object", "type": "object",
"properties":{ "properties": {
"name": { "name": {
"description": "The unique identifier of a cluster", "description": "The unique identifier of a cluster",
"type": "string" "type": "string"
@ -14,7 +14,7 @@
"type": "array", "type": "array",
"items": { "items": {
"type": "object", "type": "object",
"properties":{ "properties": {
"name": { "name": {
"description": "Metric name", "description": "Metric name",
"type": "string" "type": "string"
@ -39,12 +39,28 @@
"avg" "avg"
] ]
}, },
"peak": {
"description": "Metric peak threshold (Upper metric limit)",
"type": "number"
},
"normal": {
"description": "Metric normal threshold",
"type": "number"
},
"caution": {
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
"type": "number"
},
"alert": {
"description": "Metric alert threshold (Requires immediate action)",
"type": "number"
},
"subClusters": { "subClusters": {
"description": "Array of cluster hardware partition metric thresholds", "description": "Array of cluster hardware partition metric thresholds",
"type": "array", "type": "array",
"items": { "items": {
"type": "object", "type": "object",
"properties":{ "properties": {
"name": { "name": {
"description": "Hardware partition name", "description": "Hardware partition name",
"type": "string" "type": "string"
@ -60,13 +76,13 @@
}, },
"alert": { "alert": {
"type": "number" "type": "number"
},
"remove": {
"type": "boolean"
} }
}, },
"required": [ "required": [
"name", "name"
"peak",
"caution",
"alert"
] ]
} }
} }
@ -75,7 +91,12 @@
"name", "name",
"unit", "unit",
"scope", "scope",
"timestep" "timestep",
"aggregation",
"peak",
"normal",
"caution",
"alert"
] ]
}, },
"minItems": 1 "minItems": 1
@ -85,7 +106,7 @@
"type": "array", "type": "array",
"items": { "items": {
"type": "object", "type": "object",
"properties":{ "properties": {
"name": { "name": {
"description": "Hardware partition name", "description": "Hardware partition name",
"type": "string" "type": "string"
@ -108,15 +129,42 @@
}, },
"flopRateScalar": { "flopRateScalar": {
"description": "Theoretical node peak flop rate for scalar code in GFlops/s", "description": "Theoretical node peak flop rate for scalar code in GFlops/s",
"type": "integer" "type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
}, },
"flopRateSimd": { "flopRateSimd": {
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s", "description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
"type": "integer" "type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
}, },
"memoryBandwidth": { "memoryBandwidth": {
"description": "Theoretical node peak memory bandwidth in GB/s", "description": "Theoretical node peak memory bandwidth in GB/s",
"type": "integer" "type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
}, },
"nodes": { "nodes": {
"description": "Node list expression", "description": "Node list expression",
@ -125,7 +173,7 @@
"topology": { "topology": {
"description": "Node topology", "description": "Node topology",
"type": "object", "type": "object",
"properties":{ "properties": {
"node": { "node": {
"description": "HwTread lists of node", "description": "HwTread lists of node",
"type": "array", "type": "array",
@ -205,15 +253,16 @@
} }
} }
}, },
"required":[ "required": [
"node", "node",
"socket", "socket",
"memoryDomain" "memoryDomain"
] ]
} }
}, },
"required":[ "required": [
"name", "name",
"nodes",
"topology", "topology",
"processorType", "processorType",
"socketsPerNode", "socketsPerNode",
@ -227,7 +276,7 @@
"minItems": 1 "minItems": 1
} }
}, },
"required":[ "required": [
"name", "name",
"metricConfig", "metricConfig",
"subClusters" "subClusters"

View File

@ -86,8 +86,8 @@
}, },
"minProperties": 1 "minProperties": 1
}, },
"cpu_used": { "cpu_user": {
"description": "CPU active core utilization", "description": "CPU user active core utilization",
"properties": { "properties": {
"node": { "node": {
"$ref": "job-metric-data.schema.json" "$ref": "job-metric-data.schema.json"
@ -479,7 +479,8 @@
] ]
}, },
"required": [ "required": [
"cpu_used", "cpu_user",
"cpu_load",
"mem_used", "mem_used",
"flops_any", "flops_any",
"mem_bw", "mem_bw",

View File

@ -193,8 +193,8 @@
"description": "Instructions executed per cycle", "description": "Instructions executed per cycle",
"$ref": "job-metric-statistics.schema.json" "$ref": "job-metric-statistics.schema.json"
}, },
"cpu_used": { "cpu_user": {
"description": "CPU active core utilization", "description": "CPU user active core utilization",
"$ref": "job-metric-statistics.schema.json" "$ref": "job-metric-statistics.schema.json"
}, },
"flops_dp": { "flops_dp": {
@ -326,7 +326,8 @@
} }
}, },
"required": [ "required": [
"cpu_used", "cpu_user",
"cpu_load",
"mem_used", "mem_used",
"flops_any", "flops_any",
"mem_bw" "mem_bw"
@ -338,6 +339,7 @@
"user", "user",
"project", "project",
"cluster", "cluster",
"subCluster",
"numNodes", "numNodes",
"exclusive", "exclusive",
"startTime", "startTime",

View File

@ -193,7 +193,7 @@
}, },
"data": { "data": {
"type": "array", "type": "array",
"items": { "contains": {
"type": "number", "type": "number",
"minimum": 0 "minimum": 0
}, },

View File

@ -15,7 +15,6 @@
"F/s", "F/s",
"CPI", "CPI",
"IPC", "IPC",
"load",
"Hz", "Hz",
"W", "W",
"°C", "°C",

View File

@ -26,32 +26,43 @@ type Job {
state: JobState! state: JobState!
tags: [Tag!]! tags: [Tag!]!
resources: [Resource!]! resources: [Resource!]!
concurrentJobs: JobLinkResultList
metaData: Any metaData: Any
userData: User userData: User
} }
type JobLink {
id: ID!
jobId: Int!
}
type Cluster { type Cluster {
name: String! name: String!
partitions: [String!]! # Slurm partitions partitions: [String!]! # Slurm partitions
metricConfig: [MetricConfig!]! metricConfig: [MetricConfig!]!
filterRanges: FilterRanges!
subClusters: [SubCluster!]! # Hardware partitions/subclusters subClusters: [SubCluster!]! # Hardware partitions/subclusters
} }
type SubCluster { type SubCluster {
name: String! name: String!
nodes: String! nodes: String!
numberOfNodes: Int!
processorType: String! processorType: String!
socketsPerNode: Int! socketsPerNode: Int!
coresPerSocket: Int! coresPerSocket: Int!
threadsPerCore: Int! threadsPerCore: Int!
flopRateScalar: Int! flopRateScalar: MetricValue!
flopRateSimd: Int! flopRateSimd: MetricValue!
memoryBandwidth: Int! memoryBandwidth: MetricValue!
topology: Topology! topology: Topology!
} }
type MetricValue {
unit: Unit!
value: Float!
}
type Topology { type Topology {
node: [Int!] node: [Int!]
socket: [[Int!]!] socket: [[Int!]!]
@ -67,15 +78,26 @@ type Accelerator {
model: String! model: String!
} }
type SubClusterConfig {
name: String!
peak: Float
normal: Float
caution: Float
alert: Float
remove: Boolean
}
type MetricConfig { type MetricConfig {
name: String! name: String!
unit: String! unit: Unit!
scope: MetricScope! scope: MetricScope!
aggregation: String!
timestep: Int! timestep: Int!
peak: Float! peak: Float!
normal: Float! normal: Float
caution: Float! caution: Float!
alert: Float! alert: Float!
subClusters: [SubClusterConfig!]!
} }
type Tag { type Tag {
@ -87,18 +109,18 @@ type Tag {
type Resource { type Resource {
hostname: String! hostname: String!
hwthreads: [Int!] hwthreads: [Int!]
accelerators: [Int!] accelerators: [String!]
configuration: String configuration: String
} }
type JobMetricWithName { type JobMetricWithName {
name: String! name: String!
scope: MetricScope!
metric: JobMetric! metric: JobMetric!
} }
type JobMetric { type JobMetric {
unit: String! unit: Unit
scope: MetricScope!
timestep: Int! timestep: Int!
series: [Series!] series: [Series!]
statisticsSeries: StatsSeries statisticsSeries: StatsSeries
@ -106,11 +128,16 @@ type JobMetric {
type Series { type Series {
hostname: String! hostname: String!
id: Int id: String
statistics: MetricStatistics statistics: MetricStatistics
data: [NullableFloat!]! data: [NullableFloat!]!
} }
type Unit {
base: String!
prefix: String
}
type MetricStatistics { type MetricStatistics {
avg: Float! avg: Float!
min: Float! min: Float!
@ -134,9 +161,11 @@ type Footprints {
} }
enum Aggregate { USER, PROJECT, CLUSTER } enum Aggregate { USER, PROJECT, CLUSTER }
enum Weights { NODE_COUNT, NODE_HOURS }
type NodeMetrics { type NodeMetrics {
host: String! host: String!
subCluster: String!
metrics: [JobMetricWithName!]! metrics: [JobMetricWithName!]!
} }
@ -156,6 +185,7 @@ type Query {
tags: [Tag!]! # List of all tags tags: [Tag!]! # List of all tags
user(username: String!): User user(username: String!): User
allocatedNodes(cluster: String!): [Count!]!
job(id: ID!): Job job(id: ID!): Job
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]! jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
@ -163,11 +193,11 @@ type Query {
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList! jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]! jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, limit: Int): [Count!]! jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, weight: Weights, limit: Int): [Count!]!
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
nodeMetrics(cluster: String!, partition: String, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
} }
type Mutation { type Mutation {
@ -182,18 +212,13 @@ type Mutation {
type IntRangeOutput { from: Int!, to: Int! } type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { from: Time!, to: Time! } type TimeRangeOutput { from: Time!, to: Time! }
type FilterRanges {
duration: IntRangeOutput!
numNodes: IntRangeOutput!
startTime: TimeRangeOutput!
}
input JobFilter { input JobFilter {
tags: [ID!] tags: [ID!]
jobId: StringInput jobId: StringInput
arrayJobId: Int arrayJobId: Int
user: StringInput user: StringInput
project: StringInput project: StringInput
jobName: StringInput
cluster: StringInput cluster: StringInput
partition: StringInput partition: StringInput
duration: IntRange duration: IntRange
@ -210,6 +235,12 @@ input JobFilter {
memBwAvg: FloatRange memBwAvg: FloatRange
loadAvg: FloatRange loadAvg: FloatRange
memUsedMax: FloatRange memUsedMax: FloatRange
exclusive: Int
sharedNode: StringInput
selfJobId: StringInput
selfStartTime: Time
selfDuration: Int
} }
input OrderByInput { input OrderByInput {
@ -224,9 +255,11 @@ enum SortDirectionEnum {
input StringInput { input StringInput {
eq: String eq: String
neq: String
contains: String contains: String
startsWith: String startsWith: String
endsWith: String endsWith: String
in: [String!]
} }
input IntRange { from: Int!, to: Int! } input IntRange { from: Int!, to: Int! }
@ -240,6 +273,11 @@ type JobResultList {
count: Int count: Int
} }
type JobLinkResultList {
items: [JobLink!]!
count: Int
}
type HistoPoint { type HistoPoint {
count: Int! count: Int!
value: Int! value: Int!
@ -247,11 +285,15 @@ type HistoPoint {
type JobsStatistics { type JobsStatistics {
id: ID! # If `groupBy` was used, ID of the user/project/cluster id: ID! # If `groupBy` was used, ID of the user/project/cluster
totalJobs: Int! # Number of jobs that matched name: String! # if User-Statistics: Given Name of Account (ID) Owner
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes totalJobs: Int! # Number of jobs
runningJobs: Int! # Number of running jobs
shortJobs: Int! # Number of jobs with a duration of less than duration
totalWalltime: Int! # Sum of the duration of all matched jobs in hours totalWalltime: Int! # Sum of the duration of all matched jobs in hours
totalNodeHours: Int! # Sum of the node hours of all matched jobs
totalCoreHours: Int! # Sum of the core hours of all matched jobs totalCoreHours: Int! # Sum of the core hours of all matched jobs
histWalltime: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value totalAccHours: Int! # Sum of the gpu hours of all matched jobs
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
} }

View File

@ -1,221 +0,0 @@
#
# ClusterCockpit's API spec can be exported via:
# docker exec -it cc-php php bin/console api:openapi:export --yaml
#
# This spec is written by hand and hopefully up to date with the API.
#
openapi: 3.0.3
info:
title: 'ClusterCockpit REST API'
description: 'API for batch job control'
version: 0.0.2
servers:
- url: /
description: ''
paths:
'/api/jobs/':
get:
operationId: 'getJobs'
summary: 'List all jobs'
description: 'Get a list of all jobs. Filters can be applied using query parameters.'
parameters:
- name: state
in: query
schema:
type: string
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
- name: cluster
in: query
schema: { type: string }
- name: start-time
description: 'Syntax: "<from>-<to>", where <from> and <to> are unix timestamps in seconds'
in: query
schema: { type: string }
- name: page
in: query
schema: { type: integer }
- name: items-per-page
in: query
schema: { type: integer }
- name: with-metadata
in: query
schema: { type: boolean }
responses:
200:
description: 'Array of jobs'
content:
'application/json':
schema:
type: object
properties:
jobs:
type: array
items:
$ref: '#/components/schemas/Job'
400:
description: 'Bad Request'
'/api/jobs/tag_job/{id}':
post:
operationId: 'tagJob'
summary: 'Add a tag to a job'
parameters:
- name: id
in: path
required: true
schema: { type: integer }
description: 'Job ID'
requestBody:
description: 'Array of tags to add'
required: true
content:
'application/json':
schema:
type: array
items:
$ref: '#/components/schemas/Tag'
responses:
200:
description: 'Job resource'
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
404:
description: 'Job or tag does not exist'
400:
description: 'Bad request'
'/api/jobs/start_job/':
post:
operationId: 'startJob'
summary: 'Add a newly started job'
requestBody:
required: true
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
responses:
201:
description: 'Job successfully'
content:
'application/json':
schema:
type: object
properties:
id:
type: integer
description: 'The database ID assigned to this job'
400:
description: 'Bad request'
422:
description: 'The combination of jobId, clusterId and startTime does already exist'
'/api/jobs/stop_job/':
post:
operationId: stopJobViaJobID
summary: 'Mark a job as stopped. Which job to stop is specified by the request body.'
requestBody:
required: true
content:
'application/json':
schema:
type: object
required: [jobId, cluster, stopTime, jobState]
properties:
jobId: { type: integer }
cluster: { type: string }
startTime: { type: integer }
stopTime: { type: integer }
jobState:
type: string
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
responses:
200:
description: 'Job resource'
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
400:
description: 'Bad request'
404:
description: 'Resource not found'
'/api/jobs/stop_job/{id}':
post:
operationId: 'stopJobViaDBID'
summary: 'Mark a job as stopped.'
parameters:
- name: id
in: path
required: true
schema: { type: integer }
description: 'Database ID (Resource Identifier)'
requestBody:
required: true
content:
'application/json':
schema:
type: object
required: [stopTime, jobState]
properties:
stopTime: { type: integer }
jobState:
type: string
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
responses:
200:
description: 'Job resource'
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
400:
description: 'Bad request'
404:
description: 'Resource not found'
'/api/jobs/import/':
post:
operationId: 'importJob'
summary: 'Imports a job and its metric data'
requestBody:
required: true
content:
'application/json':
schema:
type: object
properties:
meta:
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
data:
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-data.schema.json
responses:
200:
description: 'Import successful'
400:
description: 'Bad request'
422:
description: 'Unprocessable Entity'
components:
schemas:
Tag:
description: 'A job tag'
type: object
properties:
id:
type: string
description: 'Database ID'
type:
type: string
description: 'Tag type'
name:
type: string
description: 'Tag name'
Job:
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
securitySchemes:
bearerAuth:
type: http
scheme: bearer
bearerFormat: JWT
security:
- bearerAuth: [] # Applies `bearerAuth` globally

1408
interfaces/rest/swagger.json Normal file

File diff suppressed because it is too large Load Diff

1006
interfaces/rest/swagger.yaml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,49 +1,71 @@
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tag;
CREATE TABLE job (
id INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
CREATE TABLE tag ( CREATE TABLE tag (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL, tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL, tag_name VARCHAR(255) NOT NULL,
CONSTRAINT be_unique UNIQUE (tag_type, tag_name)); insert_ts TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE (tag_type, tag_name));
CREATE TABLE jobtag ( CREATE TABLE jobtag (
job_id INTEGER, job_id INTEGER,
tag_id INTEGER, tag_id INTEGER,
PRIMARY KEY (job_id, tag_id), insert_ts TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE, PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE); FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL,
projects varchar(255) NOT NULL DEFAULT "[]");
CREATE TABLE configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
CREATE TABLE job (
id INTEGER PRIMARY KEY,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
partition VARCHAR(255),
array_job_id BIGINT,
duration INT NOT NULL,
walltime INT NOT NULL,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT,
num_acc INT,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0,
UNIQUE (job_id, cluster, start_time));
CREATE INDEX job_stats ON job (cluster,subcluster,user);
CREATE INDEX job_by_user ON job (user);
CREATE INDEX job_by_starttime ON job (start_time);
CREATE INDEX job_by_job_id ON job (job_id, cluster, start_time);
CREATE INDEX job_list ON job (cluster, job_state);
CREATE INDEX job_list_user ON job (user, cluster, job_state);
CREATE INDEX job_list_users ON job (user, job_state);
CREATE INDEX job_list_users_start ON job (start_time, user, job_state);