Regenerate Swagger, fix tests, cleanup

This commit is contained in:
2024-11-26 07:02:53 +01:00
parent adb11b3ed0
commit 28539e60b0
6 changed files with 254 additions and 482 deletions

View File

@@ -14,9 +14,9 @@ import (
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
@@ -200,6 +200,10 @@ func TestRestApi(t *testing.T) {
r.StrictSlash(true)
restapi.MountApiRoutes(r)
var TestJobId int64 = 123
var TestClusterName string = "testcluster"
var TestStartTime int64 = 123456789
const startJobBody string = `{
"jobId": 123,
"user": "testuser",
@@ -225,7 +229,6 @@ func TestRestApi(t *testing.T) {
"startTime": 123456789
}`
var dbid int64
const contextUserKey repository.ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
@@ -247,13 +250,10 @@ func TestRestApi(t *testing.T) {
t.Fatal(response.Status, recorder.Body.String())
}
var res api.StartJobApiResponse
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
t.Fatal(err)
}
time.Sleep(1 * time.Second)
resolver := graph.GetResolverInstance()
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(res.DBID)))
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
if err != nil {
t.Fatal(err)
}
@@ -285,8 +285,6 @@ func TestRestApi(t *testing.T) {
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
t.Fatalf("unexpected tags: %#v", job.Tags)
}
dbid = res.DBID
}); !ok {
return
}
@@ -314,8 +312,7 @@ func TestRestApi(t *testing.T) {
}
archiver.WaitForArchiving()
resolver := graph.GetResolverInstance()
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(dbid)))
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
if err != nil {
t.Fatal(err)
}
@@ -404,8 +401,10 @@ func TestRestApi(t *testing.T) {
t.Fatal("subtest failed")
}
time.Sleep(1 * time.Second)
const stopJobBodyFailed string = `{
"jobId": 12345,
"jobId": 12345,
"cluster": "testcluster",
"jobState": "failed",

View File

@@ -601,88 +601,6 @@ const docTemplate = `{
}
}
},
"/jobs/stop_job/{id}": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to stop is specified by database ID. Only stopTime and final state are required in request body.\nReturns full job resource information according to 'JobMeta' scheme.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"Job add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
"type": "integer",
"description": "Database ID of Job",
"name": "id",
"in": "path",
"required": true
},
{
"description": "stopTime and final state in request body",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.StopJobApiRequest"
}
}
],
"responses": {
"200": {
"description": "Job resource",
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/tag_job/{id}": {
"post": {
"security": [
@@ -690,7 +608,7 @@ const docTemplate = `{
"ApiKeyAuth": []
}
],
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nTag Scope for frontend visibility will default to \"global\" if none entered, other options: \"admin\" or specific username.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
"consumes": [
"application/json"
],
@@ -1283,6 +1201,11 @@ const docTemplate = `{
"type": "string",
"example": "Testjob"
},
"scope": {
"description": "Tag Scope for Frontend Display",
"type": "string",
"example": "global"
},
"type": {
"description": "Tag Type",
"type": "string",
@@ -1410,9 +1333,8 @@ const docTemplate = `{
"api.StartJobApiResponse": {
"type": "object",
"properties": {
"id": {
"description": "Database ID of new job",
"type": "integer"
"msg": {
"type": "string"
}
}
},
@@ -1424,17 +1346,14 @@ const docTemplate = `{
],
"properties": {
"cluster": {
"description": "Cluster of job",
"type": "string",
"example": "fritz"
},
"jobId": {
"description": "Cluster Job ID of job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final job state",
"allOf": [
{
"$ref": "#/definitions/schema.JobState"
@@ -1443,12 +1362,10 @@ const docTemplate = `{
"example": "completed"
},
"startTime": {
"description": "Start Time of job as epoch",
"type": "integer",
"example": 1649723812
},
"stopTime": {
"description": "Stop Time of job as epoch",
"type": "integer",
"example": 1649763839
}
@@ -1493,12 +1410,10 @@ const docTemplate = `{
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
@@ -1506,33 +1421,39 @@ const docTemplate = `{
"$ref": "#/definitions/schema.JobLinkResultList"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"energy": {
"type": "number"
},
"energyFootprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"flopsAnyAvg": {
"description": "FlopsAnyAvg as Float64",
"type": "number"
"footprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
@@ -1548,95 +1469,69 @@ const docTemplate = `{
],
"example": "completed"
},
"loadAvg": {
"description": "LoadAvg as Float64",
"type": "number"
},
"memBwAvg": {
"description": "MemBwAvg as Float64",
"type": "number"
},
"memUsedMax": {
"description": "MemUsedMax as Float64",
"type": "number"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start time as 'time.Time' data type",
"type": "string"
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
@@ -1673,12 +1568,10 @@ const docTemplate = `{
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
@@ -1686,29 +1579,39 @@ const docTemplate = `{
"$ref": "#/definitions/schema.JobLinkResultList"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"energy": {
"type": "number"
},
"energyFootprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"footprint": {
"type": "object",
"additionalProperties": {
"type": "number"
}
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"enum": [
"completed",
"failed",
@@ -1725,91 +1628,76 @@ const docTemplate = `{
"example": "completed"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start epoch time stamp in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 1649723812
},
"statistics": {
"description": "Metric statistics of job",
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/schema.JobStatistics"
}
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
@@ -1898,6 +1786,15 @@ const docTemplate = `{
"caution": {
"type": "number"
},
"energy": {
"type": "string"
},
"footprint": {
"type": "string"
},
"lowerIsBetter": {
"type": "boolean"
},
"name": {
"type": "string"
},
@@ -1975,22 +1872,18 @@ const docTemplate = `{
"type": "object",
"properties": {
"accelerators": {
"description": "List of of accelerator device ids",
"type": "array",
"items": {
"type": "string"
}
},
"configuration": {
"description": "The configuration options of the node",
"type": "string"
},
"hostname": {
"description": "Name of the host (= node)",
"type": "string"
},
"hwthreads": {
"description": "List of OS processor ids",
"type": "array",
"items": {
"type": "integer"
@@ -2033,6 +1926,12 @@ const docTemplate = `{
"type": "number"
}
},
"median": {
"type": "array",
"items": {
"type": "number"
}
},
"min": {
"type": "array",
"items": {
@@ -2056,15 +1955,33 @@ const docTemplate = `{
"coresPerSocket": {
"type": "integer"
},
"energyFootprint": {
"type": "array",
"items": {
"type": "string"
}
},
"flopRateScalar": {
"$ref": "#/definitions/schema.MetricValue"
},
"flopRateSimd": {
"$ref": "#/definitions/schema.MetricValue"
},
"footprint": {
"type": "array",
"items": {
"type": "string"
}
},
"memoryBandwidth": {
"$ref": "#/definitions/schema.MetricValue"
},
"metricConfig": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.MetricConfig"
}
},
"name": {
"type": "string"
},
@@ -2094,6 +2011,15 @@ const docTemplate = `{
"caution": {
"type": "number"
},
"energy": {
"type": "string"
},
"footprint": {
"type": "string"
},
"lowerIsBetter": {
"type": "boolean"
},
"name": {
"type": "string"
},
@@ -2113,16 +2039,17 @@ const docTemplate = `{
"type": "object",
"properties": {
"id": {
"description": "The unique DB identifier of a tag",
"type": "integer"
},
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"scope": {
"type": "string",
"example": "global"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}

View File

@@ -124,8 +124,7 @@ func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) {
// StartJobApiResponse model
type StartJobApiResponse struct {
// Database ID of new job
DBID int64 `json:"id"`
Message string `json:"msg"`
}
// DeleteJobApiResponse model
@@ -806,25 +805,10 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
repository.TriggerJobStart(repository.JobWithUser{Job: &req, User: repository.GetUserFromContext(r.Context())})
id, err := api.JobRepository.Start(&req)
if err != nil {
handleError(fmt.Errorf("insert into database failed: %w", err), http.StatusInternalServerError, rw)
return
}
for _, tag := range req.Tags {
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
return
}
}
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusCreated)
json.NewEncoder(rw).Encode(StartJobApiResponse{
DBID: id,
Message: fmt.Sprintf("Successfully triggered job start"),
})
}

View File

@@ -6,6 +6,7 @@ package repository
import (
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -36,18 +37,30 @@ func jobStartWorker() {
break
}
jobRepo := GetJobRepository()
var id int64
id, err := jobRepo.Start(req.Job)
if err != nil {
log.Errorf("insert into database failed: %v", err)
for i := 0; i < 5; i++ {
var err error
id, err = jobRepo.Start(req.Job)
if err != nil {
log.Errorf("Attempt %d: insert into database failed: %v", i, err)
} else {
break
}
time.Sleep(1 * time.Second)
}
for _, tag := range req.Job.Tags {
if _, err := jobRepo.AddTagOrCreate(req.User, id, tag.Type, tag.Name, tag.Scope); err != nil {
if _, err := jobRepo.AddTagOrCreate(req.User, id,
tag.Type, tag.Name, tag.Scope); err != nil {
log.Errorf("adding tag to new job %d failed: %v", id, err)
}
}
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d",
id, req.Job.Cluster, req.Job.JobID, req.Job.User, req.Job.StartTime)
jobStartPending.Done()
}
}