Merge branch 'dev' into optimize-checkpoint-loading

This commit is contained in:
2026-02-23 14:21:32 +01:00
7 changed files with 260 additions and 73 deletions

View File

@@ -89,7 +89,7 @@ For release specific notes visit the [ClusterCockpit Documentation](https://clus
- **Job tagger option**: Enable automatic job tagging via configuration flag - **Job tagger option**: Enable automatic job tagging via configuration flag
- **Application detection**: Automatic detection of applications (MATLAB, GROMACS, etc.) - **Application detection**: Automatic detection of applications (MATLAB, GROMACS, etc.)
- **Job classification**: Automatic detection of pathological jobs - **Job classification**: Automatic detection of pathological jobs
- **omitTagged flag**: Option to exclude tagged jobs from retention/cleanup operations - **omit-tagged**: Option to exclude tagged jobs from retention/cleanup operations (`none`, `all`, or `user`)
- **Admin UI trigger**: Taggers can be run on-demand from the admin web interface - **Admin UI trigger**: Taggers can be run on-demand from the admin web interface
without restarting the backend without restarting the backend

View File

@@ -389,8 +389,71 @@
] ]
} }
}, },
"/api/jobs/edit_meta/": {
"patch": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Edit key value pairs in metadata json of job specified by jobID, StartTime and Cluster\nIf a key already exists its content will be overwritten",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"Job add and modify"
],
"summary": "Edit meta-data json by request",
"parameters": [
{
"description": "Specifies job and payload to add or update",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.JobMetaRequest"
}
}
],
"responses": {
"200": {
"description": "Updated job resource",
"schema": {
"$ref": "#/definitions/schema.Job"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Job does not exist",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/api/jobs/edit_meta/{id}": { "/api/jobs/edit_meta/{id}": {
"post": { "patch": {
"description": "Edit key value pairs in job metadata json\nIf a key already exists its content will be overwritten", "description": "Edit key value pairs in job metadata json\nIf a key already exists its content will be overwritten",
"consumes": [ "consumes": [
"application/json" "application/json"

View File

@@ -102,6 +102,27 @@ definitions:
description: Page id returned description: Page id returned
type: integer type: integer
type: object type: object
api.JobMetaRequest:
properties:
cluster:
description: Cluster of job
example: fritz
type: string
jobId:
description: Cluster Job ID of job
example: 123000
type: integer
payload:
allOf:
- $ref: '#/definitions/api.EditMetaRequest'
description: Content to Add to Job Meta_Data
startTime:
description: Start Time of job as epoch
example: 1649723812
type: integer
required:
- jobId
type: object
api.JobMetricWithName: api.JobMetricWithName:
properties: properties:
metric: metric:
@@ -1091,8 +1112,50 @@ paths:
summary: Remove a job from the sql database summary: Remove a job from the sql database
tags: tags:
- Job remove - Job remove
/api/jobs/edit_meta/:
patch:
consumes:
- application/json
description: |-
Edit key value pairs in metadata json of job specified by jobID, StartTime and Cluster
If a key already exists its content will be overwritten
parameters:
- description: Specifies job and payload to add or update
in: body
name: request
required: true
schema:
$ref: '#/definitions/api.JobMetaRequest'
produces:
- application/json
responses:
"200":
description: Updated job resource
schema:
$ref: '#/definitions/schema.Job'
"400":
description: Bad Request
schema:
$ref: '#/definitions/api.ErrorResponse'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"404":
description: Job does not exist
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/api.ErrorResponse'
security:
- ApiKeyAuth: []
summary: Edit meta-data json by request
tags:
- Job add and modify
/api/jobs/edit_meta/{id}: /api/jobs/edit_meta/{id}:
post: patch:
consumes: consumes:
- application/json - application/json
description: |- description: |-

View File

@@ -396,8 +396,71 @@ const docTemplate = `{
] ]
} }
}, },
"/api/jobs/edit_meta/": {
"patch": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Edit key value pairs in metadata json of job specified by jobID, StartTime and Cluster\nIf a key already exists its content will be overwritten",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"Job add and modify"
],
"summary": "Edit meta-data json by request",
"parameters": [
{
"description": "Specifies job and payload to add or update",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.JobMetaRequest"
}
}
],
"responses": {
"200": {
"description": "Updated job resource",
"schema": {
"$ref": "#/definitions/schema.Job"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Job does not exist",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/api/jobs/edit_meta/{id}": { "/api/jobs/edit_meta/{id}": {
"post": { "patch": {
"description": "Edit key value pairs in job metadata json\nIf a key already exists its content will be overwritten", "description": "Edit key value pairs in job metadata json\nIf a key already exists its content will be overwritten",
"consumes": [ "consumes": [
"application/json" "application/json"

View File

@@ -72,6 +72,14 @@ type EditMetaRequest struct {
Value string `json:"value" example:"bash script"` Value string `json:"value" example:"bash script"`
} }
// JobMetaRequest model
type JobMetaRequest struct {
JobId *int64 `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
Payload EditMetaRequest `json:"payload"` // Content to Add to Job Meta_Data
}
type TagJobAPIRequest []*APITag type TagJobAPIRequest []*APITag
type GetJobAPIRequest []string type GetJobAPIRequest []string
@@ -423,21 +431,21 @@ func (api *RestAPI) getJobByID(rw http.ResponseWriter, r *http.Request) {
} }
// editMeta godoc // editMeta godoc
// @summary Edit meta-data json // @summary Edit meta-data json of job identified by database id
// @tags Job add and modify // @tags Job add and modify
// @description Edit key value pairs in job metadata json // @description Edit key value pairs in job metadata json of job specified by database id
// @description If a key already exists its content will be overwritten // @description If a key already exists its content will be overwritten
// @accept json // @accept json
// @produce json // @produce json
// @param id path int true "Job Database ID" // @param id path int true "Job Database ID"
// @param request body api.EditMetaRequest true "Kay value pair to add" // @param request body api.EditMetaRequest true "Metadata Key value pair to add or update"
// @success 200 {object} schema.Job "Updated job resource" // @success 200 {object} schema.Job "Updated job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request" // @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized" // @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 404 {object} api.ErrorResponse "Job does not exist" // @failure 404 {object} api.ErrorResponse "Job does not exist"
// @failure 500 {object} api.ErrorResponse "Internal Server Error" // @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth // @security ApiKeyAuth
// @router /api/jobs/edit_meta/{id} [post] // @router /api/jobs/edit_meta/{id} [patch]
func (api *RestAPI) editMeta(rw http.ResponseWriter, r *http.Request) { func (api *RestAPI) editMeta(rw http.ResponseWriter, r *http.Request) {
id, err := strconv.ParseInt(chi.URLParam(r, "id"), 10, 64) id, err := strconv.ParseInt(chi.URLParam(r, "id"), 10, 64)
if err != nil { if err != nil {
@@ -469,6 +477,54 @@ func (api *RestAPI) editMeta(rw http.ResponseWriter, r *http.Request) {
} }
} }
// editMetaByRequest godoc
// @summary Edit meta-data json of job identified by request
// @tags Job add and modify
// @description Edit key value pairs in metadata json of job specified by jobID, StartTime and Cluster
// @description If a key already exists its content will be overwritten
// @accept json
// @produce json
// @param request body api.JobMetaRequest true "Specifies job and payload to add or update"
// @success 200 {object} schema.Job "Updated job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 404 {object} api.ErrorResponse "Job does not exist"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /api/jobs/edit_meta/ [patch]
func (api *RestAPI) editMetaByRequest(rw http.ResponseWriter, r *http.Request) {
// Parse request body
req := JobMetaRequest{}
if err := decode(r.Body, &req); err != nil {
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
return
}
// Fetch job (that will have its meta_data edited) from db
var job *schema.Job
var err error
if req.JobId == nil {
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
return
}
// log.Printf("loading db job for editMetaByRequest... : JobMetaRequest=%v", req)
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
if err := api.JobRepository.UpdateMetadata(job, req.Payload.Key, req.Payload.Value); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(job)
}
// tagJob godoc // tagJob godoc
// @summary Adds one or more tags to a job // @summary Adds one or more tags to a job
// @tags Job add and modify // @tags Job add and modify

View File

@@ -96,8 +96,8 @@ func (api *RestAPI) MountAPIRoutes(r chi.Router) {
r.Post("/jobs/tag_job/{id}", api.tagJob) r.Post("/jobs/tag_job/{id}", api.tagJob)
r.Patch("/jobs/tag_job/{id}", api.tagJob) r.Patch("/jobs/tag_job/{id}", api.tagJob)
r.Delete("/jobs/tag_job/{id}", api.removeTagJob) r.Delete("/jobs/tag_job/{id}", api.removeTagJob)
r.Post("/jobs/edit_meta/{id}", api.editMeta)
r.Patch("/jobs/edit_meta/{id}", api.editMeta) r.Patch("/jobs/edit_meta/{id}", api.editMeta)
r.Patch("/jobs/edit_meta/", api.editMetaByRequest)
r.Get("/jobs/metrics/{id}", api.getJobMetrics) r.Get("/jobs/metrics/{id}", api.getJobMetrics)
r.Delete("/jobs/delete_job/", api.deleteJobByRequest) r.Delete("/jobs/delete_job/", api.deleteJobByRequest)
r.Delete("/jobs/delete_job/{id}", api.deleteJobByID) r.Delete("/jobs/delete_job/{id}", api.deleteJobByID)

View File

@@ -32,15 +32,6 @@
/* Const Init */ /* Const Init */
const client = getContextClient(); const client = getContextClient();
const stateOptions = [
"all",
"allocated",
"idle",
"down",
"mixed",
"reserved",
"unknown",
];
const healthOptions = [ const healthOptions = [
"all", "all",
"full", "full",
@@ -52,12 +43,10 @@
let pieWidth = $state(0); let pieWidth = $state(0);
let querySorting = $state({ field: "startTime", type: "col", order: "DESC" }) let querySorting = $state({ field: "startTime", type: "col", order: "DESC" })
let tableHostFilter = $state(""); let tableHostFilter = $state("");
let tableStateFilter = $state(stateOptions[0]);
let tableHealthFilter = $state(healthOptions[0]); let tableHealthFilter = $state(healthOptions[0]);
let healthTableSorting = $state( let healthTableSorting = $state(
{ {
schedulerState: { dir: "down", active: true }, healthState: { dir: "up", active: true },
healthState: { dir: "down", active: false },
hostname: { dir: "down", active: false }, hostname: { dir: "down", active: false },
} }
); );
@@ -79,9 +68,7 @@
hostname hostname
cluster cluster
subCluster subCluster
schedulerState
healthState healthState
metaData
healthData healthData
} }
} }
@@ -102,7 +89,7 @@
let healthTableData = $derived.by(() => { let healthTableData = $derived.by(() => {
if ($statusQuery?.data) { if ($statusQuery?.data) {
return [...$statusQuery.data.nodes.items].sort((n1, n2) => { return [...$statusQuery.data.nodes.items].sort((n1, n2) => {
return n1['schedulerState'].localeCompare(n2['schedulerState']) return n1['healthState'].localeCompare(n2['healthState'])
}); });
} else { } else {
return []; return [];
@@ -114,21 +101,12 @@
if (tableHostFilter != "") { if (tableHostFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter)) pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter))
} }
if (tableStateFilter != "all") {
pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter))
}
if (tableHealthFilter != "all") { if (tableHealthFilter != "all") {
pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter)) pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter))
} }
return pendingTableData return pendingTableData
}); });
const refinedStateData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
const refinedHealthData = $derived.by(() => { const refinedHealthData = $derived.by(() => {
return $statusQuery?.data?.nodeStates. return $statusQuery?.data?.nodeStates.
filter((e) => ['full', 'partial', 'failed'].includes(e.state)). filter((e) => ['full', 'partial', 'failed'].includes(e.state)).
@@ -296,7 +274,7 @@
<thead> <thead>
<!-- Header Row 1: Titles and Sorting --> <!-- Header Row 1: Titles and Sorting -->
<tr> <tr>
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('hostname')}> <th style="width: 10%; min-width: 100px; max-width:12%;" onclick={() => sortBy('hostname')}>
Hosts ({filteredTableData.length}) Hosts ({filteredTableData.length})
<Icon <Icon
name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname'] name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname']
@@ -305,16 +283,7 @@
: ''}" : ''}"
/> />
</th> </th>
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('schedulerState')}> <th style="width: 10%; min-width: 100px; max-width:12%;" onclick={() => sortBy('healthState')}>
Scheduler State
<Icon
name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState']
.active
? '-fill'
: ''}"
/>
</th>
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('healthState')}>
Health State Health State
<Icon <Icon
name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState'] name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState']
@@ -324,7 +293,6 @@
/> />
</th> </th>
<th>Metric Availability</th> <th>Metric Availability</th>
<th>Meta Information</th>
</tr> </tr>
<!-- Header Row 2: Filters --> <!-- Header Row 2: Filters -->
<tr> <tr>
@@ -337,53 +305,27 @@
</InputGroup> </InputGroup>
</th> </th>
<th> <th>
<InputGroup size="sm"> <Input size="sm" type="select" bind:value={tableHealthFilter}>
<Input type="select" bind:value={tableStateFilter}>
{#each stateOptions as so}
<option value={so}>{so}</option>
{/each}
</Input>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th>
<th>
<InputGroup size="sm">
<Input type="select" bind:value={tableHealthFilter}>
{#each healthOptions as ho} {#each healthOptions as ho}
<option value={ho}>{ho}</option> <option value={ho}>{ho}</option>
{/each} {/each}
</Input> </Input>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th> </th>
<th></th> <th></th>
<th></th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{#each filteredTableData as host (host.hostname)} {#each filteredTableData as host (host.hostname)}
<tr> <tr>
<th scope="row"><b><a href="/monitoring/node/{cluster}/{host.hostname}" target="_blank">{host.hostname}</a></b></th> <th scope="row"><b><a href="/monitoring/node/{cluster}/{host.hostname}" target="_blank">{host.hostname}</a></b></th>
<td>{host.schedulerState}</td>
<td>{host.healthState}</td> <td>{host.healthState}</td>
<td style="max-width: 250px;"> <td style="max-width: 76%;">
{#each Object.keys(host.healthData) as hkey} {#each Object.keys(host.healthData) as hkey}
<p> <p>
<b>{hkey}</b>: {host.healthData[hkey]} <b>{hkey}</b>: {host.healthData[hkey]}
</p> </p>
{/each} {/each}
</td> </td>
<td style="max-width: 250px;">
{#each Object.keys(host.metaData) as mkey}
<p>
<b>{mkey}</b>: {host.metaData[mkey]}
</p>
{/each}
</td>
</tr> </tr>
{/each} {/each}
</tbody> </tbody>