Update specs

2026-01-08 23:26:16 +01:00 · 2022-03-18 15:19:04 +01:00
parent d762e3e52b
commit a77d6e0f31
3 changed files with 102 additions and 10 deletions
--- a/job-archive/README.md
+++ b/job-archive/README.md
@@ -0,0 +1,49 @@
 # File based archive specification for HPC jobs
 This is a json files based exchange format for HPC job meta and performance metric data.
 It consists of two parts:
 * a sqlite database schema for job meta data and performance statistics
 * a json file format together with a directory hierarchy specification
 By using an open, portable and simple specification based on files it is
 possible to exchange job performance data for research and analysis purposes as
 well as a robust way for archiving job performance data on disk.
 ## Directory hierarchy and file specification
 The job archive has top-level directories named after the clusters. In every
 cluster directory there must be one file named `cluster.json` describing the
 cluster. The json schema for this file is described here. Within this directory
 a three-level directory tree is used to organize job files.
 To manage the number of directories within a single directory a tree approach
 is used splitting the integer job ID. The job id is split in junks of 1000
 each.
 For a 2 layer schema this can be achieved with (code example in Perl):
 ```perl
 $level1 = $jobID/1000;
 $level2 = $jobID%1000;
 $dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2);
 ```
 The last directory level is the unix epoch timestamp in seconds to allow for
 overflowing job ids.
 Example:
 For the job ID 1034871 the directory path is ./1034/871/<timestamp>/.
 The job data consists of two files:
 * meta.json: Contains job meta information and job statistics.
 * data.json: Contains complete job data with time series
 The description of the json format specification is available as json schema.
 Metric time series data is stored with fixed time step. The time step can be
 set per metric. If no value is available for a metric time series data
 timestamp null must be entered.
--- a/schemas/README.md
+++ b/schemas/README.md
@@ -0,0 +1,4 @@
 ## SQL Database Schema for Job Table
 This sqlite schema for a HPC job table is used in cc-backend and also part of
 the ClusterCockpit Job Archive specification.
--- a/schemas/jobs-sqlite.sql
+++ b/schemas/jobs-sqlite.sql
@@ -1,10 +1,49 @@
-CREATE TABLE job ( id INTEGER PRIMARY KEY,
+DROP TABLE IF EXISTS jobtag;
- job_id TEXT NOT NULL, user_id TEXT NOT NULL, project_id TEXT NOT NULL, cluster_id TEXT NOT NULL,
+DROP TABLE IF EXISTS job;
- start_time INTEGER NOT NULL, duration INTEGER NOT NULL,
+DROP TABLE IF EXISTS tag;
- walltime INTEGER, job_state TEXT,
+
- num_nodes INTEGER NOT NULL, node_list TEXT NOT NULL, has_profile INTEGER NOT NULL,
+CREATE TABLE job (
- mem_used_max REAL, flops_any_avg REAL, mem_bw_avg REAL, load_avg REAL, net_bw_avg REAL, file_bw_avg REAL);
+    id                INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
-CREATE TABLE tag ( id INTEGER PRIMARY KEY, tag_type TEXT, tag_name TEXT);
+    job_id            BIGINT NOT NULL,
-CREATE TABLE jobtag ( job_id INTEGER, tag_id INTEGER, PRIMARY KEY (job_id, tag_id),
+    cluster           VARCHAR(255) NOT NULL,
- FOREIGN KEY (job_id) REFERENCES job (id)  ON DELETE CASCADE ON UPDATE NO ACTION,
+    subcluster        VARCHAR(255) NOT NULL,
- FOREIGN KEY (tag_id) REFERENCES tag (id)  ON DELETE CASCADE ON UPDATE NO ACTION );
+    start_time        BIGINT NOT NULL, -- Unix timestamp
    user              VARCHAR(255) NOT NULL,
    project           VARCHAR(255) NOT NULL,
    ` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
    array_job_id      BIGINT NOT NULL,
    duration          INT NOT NULL DEFAULT 0,
    walltime          INT NOT NULL DEFAULT 0,
    job_state         VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
    meta_data         TEXT,          -- JSON
    resources         TEXT NOT NULL, -- JSON
    num_nodes         INT NOT NULL,
    num_hwthreads     INT NOT NULL,
    num_acc           INT NOT NULL,
    smt               TINYINT NOT NULL DEFAULT 1 CHECK(smt               IN (0, 1   )),
    exclusive         TINYINT NOT NULL DEFAULT 1 CHECK(exclusive         IN (0, 1, 2)),
    monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
    mem_used_max        REAL NOT NULL DEFAULT 0.0,
    flops_any_avg       REAL NOT NULL DEFAULT 0.0,
    mem_bw_avg          REAL NOT NULL DEFAULT 0.0,
    load_avg            REAL NOT NULL DEFAULT 0.0,
    net_bw_avg          REAL NOT NULL DEFAULT 0.0,
    net_data_vol_total  REAL NOT NULL DEFAULT 0.0,
    file_bw_avg         REAL NOT NULL DEFAULT 0.0,
    file_data_vol_total REAL NOT NULL DEFAULT 0.0);
 CREATE TABLE tag (
    id       INTEGER PRIMARY KEY,
    tag_type VARCHAR(255) NOT NULL,
    tag_name VARCHAR(255) NOT NULL,
    CONSTRAINT be_unique UNIQUE (tag_type, tag_name));
 CREATE TABLE jobtag (
    job_id INTEGER,
    tag_id INTEGER,
    PRIMARY KEY (job_id, tag_id),
    FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
    FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);