mirror of
https://github.com/ClusterCockpit/cc-specifications.git
synced 2024-12-25 13:09:05 +01:00
Update specs
This commit is contained in:
parent
d762e3e52b
commit
a77d6e0f31
49
job-archive/README.md
Normal file
49
job-archive/README.md
Normal file
@ -0,0 +1,49 @@
|
||||
# File based archive specification for HPC jobs
|
||||
|
||||
This is a json files based exchange format for HPC job meta and performance metric data.
|
||||
|
||||
It consists of two parts:
|
||||
* a sqlite database schema for job meta data and performance statistics
|
||||
* a json file format together with a directory hierarchy specification
|
||||
|
||||
By using an open, portable and simple specification based on files it is
|
||||
possible to exchange job performance data for research and analysis purposes as
|
||||
well as a robust way for archiving job performance data on disk.
|
||||
|
||||
## Directory hierarchy and file specification
|
||||
|
||||
The job archive has top-level directories named after the clusters. In every
|
||||
cluster directory there must be one file named `cluster.json` describing the
|
||||
cluster. The json schema for this file is described here. Within this directory
|
||||
a three-level directory tree is used to organize job files.
|
||||
|
||||
To manage the number of directories within a single directory a tree approach
|
||||
is used splitting the integer job ID. The job id is split in junks of 1000
|
||||
each.
|
||||
|
||||
For a 2 layer schema this can be achieved with (code example in Perl):
|
||||
|
||||
```perl
|
||||
$level1 = $jobID/1000;
|
||||
$level2 = $jobID%1000;
|
||||
$dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2);
|
||||
|
||||
```
|
||||
|
||||
The last directory level is the unix epoch timestamp in seconds to allow for
|
||||
overflowing job ids.
|
||||
|
||||
Example:
|
||||
|
||||
For the job ID 1034871 the directory path is ./1034/871/<timestamp>/.
|
||||
|
||||
The job data consists of two files:
|
||||
|
||||
* meta.json: Contains job meta information and job statistics.
|
||||
* data.json: Contains complete job data with time series
|
||||
|
||||
The description of the json format specification is available as json schema.
|
||||
|
||||
Metric time series data is stored with fixed time step. The time step can be
|
||||
set per metric. If no value is available for a metric time series data
|
||||
timestamp null must be entered.
|
@ -0,0 +1,4 @@
|
||||
## SQL Database Schema for Job Table
|
||||
|
||||
This sqlite schema for a HPC job table is used in cc-backend and also part of
|
||||
the ClusterCockpit Job Archive specification.
|
@ -1,10 +1,49 @@
|
||||
CREATE TABLE job ( id INTEGER PRIMARY KEY,
|
||||
job_id TEXT NOT NULL, user_id TEXT NOT NULL, project_id TEXT NOT NULL, cluster_id TEXT NOT NULL,
|
||||
start_time INTEGER NOT NULL, duration INTEGER NOT NULL,
|
||||
walltime INTEGER, job_state TEXT,
|
||||
num_nodes INTEGER NOT NULL, node_list TEXT NOT NULL, has_profile INTEGER NOT NULL,
|
||||
mem_used_max REAL, flops_any_avg REAL, mem_bw_avg REAL, load_avg REAL, net_bw_avg REAL, file_bw_avg REAL);
|
||||
CREATE TABLE tag ( id INTEGER PRIMARY KEY, tag_type TEXT, tag_name TEXT);
|
||||
CREATE TABLE jobtag ( job_id INTEGER, tag_id INTEGER, PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE ON UPDATE NO ACTION,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE ON UPDATE NO ACTION );
|
||||
DROP TABLE IF EXISTS jobtag;
|
||||
DROP TABLE IF EXISTS job;
|
||||
DROP TABLE IF EXISTS tag;
|
||||
|
||||
CREATE TABLE job (
|
||||
id INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster VARCHAR(255) NOT NULL,
|
||||
subcluster VARCHAR(255) NOT NULL,
|
||||
start_time BIGINT NOT NULL, -- Unix timestamp
|
||||
|
||||
user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
|
||||
array_job_id BIGINT NOT NULL,
|
||||
duration INT NOT NULL DEFAULT 0,
|
||||
walltime INT NOT NULL DEFAULT 0,
|
||||
job_state VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
|
||||
meta_data TEXT, -- JSON
|
||||
resources TEXT NOT NULL, -- JSON
|
||||
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT NOT NULL,
|
||||
num_acc INT NOT NULL,
|
||||
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
|
||||
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
|
||||
|
||||
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
load_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||
file_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
|
||||
|
||||
CREATE TABLE tag (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tag_type VARCHAR(255) NOT NULL,
|
||||
tag_name VARCHAR(255) NOT NULL,
|
||||
CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
|
||||
|
||||
CREATE TABLE jobtag (
|
||||
job_id INTEGER,
|
||||
tag_id INTEGER,
|
||||
PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
|
||||
|
Loading…
Reference in New Issue
Block a user