mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-26 13:29:05 +01:00
Add async archiving option; Move REST-API to new package
This commit is contained in:
parent
9c5c8a05e2
commit
7fcc39a144
@ -12,7 +12,7 @@ git clone --recursive git@github.com:ClusterCockpit/cc-jobarchive.git
|
|||||||
cd ./cc-jobarchive/frontend
|
cd ./cc-jobarchive/frontend
|
||||||
yarn install
|
yarn install
|
||||||
export CCFRONTEND_ROLLUP_INTRO='
|
export CCFRONTEND_ROLLUP_INTRO='
|
||||||
const JOBVIEW_URL = job => `/monitoring/job/${job.jobId}`;
|
const JOBVIEW_URL = job => `/monitoring/job/${job.id}`;
|
||||||
const USERVIEW_URL = userId => `/monitoring/user/${userId}`;
|
const USERVIEW_URL = userId => `/monitoring/user/${userId}`;
|
||||||
const TAG_URL = tag => `/monitoring/jobs/?tag=${tag.id}`;
|
const TAG_URL = tag => `/monitoring/jobs/?tag=${tag.id}`;
|
||||||
'
|
'
|
||||||
@ -52,4 +52,3 @@ This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL
|
|||||||
- [ ] Write more TODOs
|
- [ ] Write more TODOs
|
||||||
- [ ] Caching
|
- [ ] Caching
|
||||||
- [ ] Generate JWTs based on the provided keys
|
- [ ] Generate JWTs based on the provided keys
|
||||||
|
|
||||||
|
276
api/rest.go
Normal file
276
api/rest.go
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RestApi struct {
|
||||||
|
DB *sqlx.DB
|
||||||
|
Resolver *graph.Resolver
|
||||||
|
AsyncArchiving bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||||
|
r.HandleFunc("/api/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
|
r.HandleFunc("/api/jobs/stop_job/", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
|
r.HandleFunc("/api/jobs/stop_job/{id}", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
|
|
||||||
|
r.HandleFunc("/api/jobs/{id}", api.getJob).Methods(http.MethodGet)
|
||||||
|
r.HandleFunc("/api/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
|
||||||
|
}
|
||||||
|
|
||||||
|
type StartJobApiRequest struct {
|
||||||
|
JobId int64 `json:"jobId"`
|
||||||
|
UserId string `json:"userId"`
|
||||||
|
ClusterId string `json:"clusterId"`
|
||||||
|
StartTime int64 `json:"startTime"`
|
||||||
|
MetaData string `json:"metaData"`
|
||||||
|
ProjectId string `json:"projectId"`
|
||||||
|
Nodes []string `json:"nodes"`
|
||||||
|
NodeList string `json:"nodeList"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StartJobApiRespone struct {
|
||||||
|
DBID int64 `json:"id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StopJobApiRequest struct {
|
||||||
|
// JobId, ClusterId and StartTime are optional.
|
||||||
|
// They are only used if no database id was provided.
|
||||||
|
JobId *string `json:"jobId"`
|
||||||
|
ClusterId *string `json:"clusterId"`
|
||||||
|
StartTime *int64 `json:"startTime"`
|
||||||
|
|
||||||
|
// Payload
|
||||||
|
StopTime int64 `json:"stopTime"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StopJobApiRespone struct {
|
||||||
|
DBID string `json:"id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TagJobApiRequest []*struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) getJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
|
||||||
|
job, err := api.Resolver.Query().Job(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.Resolver.Job().Tags(r.Context(), job)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
job, err := api.Resolver.Query().Job(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.Resolver.Job().Tags(r.Context(), job)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tag := range req {
|
||||||
|
var tagId string
|
||||||
|
if err := sq.Select("id").From("tag").
|
||||||
|
Where("tag.tag_type = ?", tag.Type).Where("tag.tag_name = ?", tag.Name).
|
||||||
|
RunWith(api.DB).QueryRow().Scan(&tagId); err != nil {
|
||||||
|
http.Error(rw, fmt.Sprintf("the tag '%s:%s' does not exist", tag.Type, tag.Name), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := api.DB.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, job.ID, tagId); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags = append(job.Tags, &model.JobTag{
|
||||||
|
ID: tagId,
|
||||||
|
TagType: tag.Type,
|
||||||
|
TagName: tag.Name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
req := StartJobApiRequest{}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.GetClusterConfig(req.ClusterId) == nil {
|
||||||
|
http.Error(rw, fmt.Sprintf("cluster '%s' does not exist", req.ClusterId), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.Nodes == nil {
|
||||||
|
req.Nodes = strings.Split(req.NodeList, "|")
|
||||||
|
if len(req.Nodes) == 1 {
|
||||||
|
req.Nodes = strings.Split(req.NodeList, ",")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(req.Nodes) == 0 || len(req.Nodes[0]) == 0 || len(req.UserId) == 0 {
|
||||||
|
http.Error(rw, "required fields are missing", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if combination of (job_id, cluster_id, start_time) already exists:
|
||||||
|
rows, err := api.DB.Query(`SELECT job.id FROM job WHERE job.job_id = ? AND job.cluster_id = ? AND job.start_time = ?`,
|
||||||
|
req.JobId, req.ClusterId, req.StartTime)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if rows.Next() {
|
||||||
|
var id int64 = -1
|
||||||
|
rows.Scan(&id)
|
||||||
|
http.Error(rw, fmt.Sprintf("a job with that job_id, cluster_id and start_time already exists (database id: %d)", id), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := api.DB.Exec(
|
||||||
|
`INSERT INTO job (job_id, user_id, project_id, cluster_id, start_time, duration, job_state, num_nodes, node_list, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);`,
|
||||||
|
req.JobId, req.UserId, req.ProjectId, req.ClusterId, req.StartTime, 0, model.JobStateRunning, len(req.Nodes), strings.Join(req.Nodes, ","), req.MetaData)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
id, err := res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("new job (id: %d): clusterId=%s, jobId=%d, userId=%s, startTime=%d, nodes=%v\n", id, req.ClusterId, req.JobId, req.UserId, req.StartTime, req.Nodes)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusCreated)
|
||||||
|
json.NewEncoder(rw).Encode(StartJobApiRespone{
|
||||||
|
DBID: id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
req := StopJobApiRequest{}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
var job *model.Job
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
if ok {
|
||||||
|
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").Where("job.id = ?", id).RunWith(api.DB).QueryRow())
|
||||||
|
} else {
|
||||||
|
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").
|
||||||
|
Where("job.job_id = ?", req.JobId).
|
||||||
|
Where("job.cluster_id = ?", req.ClusterId).
|
||||||
|
Where("job.start_time = ?", req.StartTime).
|
||||||
|
RunWith(api.DB).QueryRow())
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != model.JobStateRunning {
|
||||||
|
http.Error(rw, "stop_time must be larger than start_time and only running jobs can be stopped", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
doArchiving := func(job *model.Job, ctx context.Context) error {
|
||||||
|
job.Duration = int(req.StopTime - job.StartTime.Unix())
|
||||||
|
jobMeta, err := metricdata.ArchiveJob(job, ctx)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("archiving job (id: %s) failed: %s\n", job.ID, err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
getAvg := func(metric string) sql.NullFloat64 {
|
||||||
|
stats, ok := jobMeta.Statistics[metric]
|
||||||
|
if !ok {
|
||||||
|
return sql.NullFloat64{Valid: false}
|
||||||
|
}
|
||||||
|
return sql.NullFloat64{Valid: true, Float64: stats.Avg}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := api.DB.Exec(
|
||||||
|
`UPDATE job SET
|
||||||
|
job_state = ?, duration = ?,
|
||||||
|
flops_any_avg = ?, mem_bw_avg = ?, net_bw_avg = ?, file_bw_avg = ?, load_avg = ?
|
||||||
|
WHERE job.id = ?`,
|
||||||
|
model.JobStateCompleted, job.Duration,
|
||||||
|
getAvg("flops_any"), getAvg("mem_bw"), getAvg("net_bw"), getAvg("file_bw"), getAvg("load"),
|
||||||
|
job.ID); err != nil {
|
||||||
|
log.Printf("archiving job (id: %s) failed: %s\n", job.ID, err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("job stopped and archived (id: %s)\n", job.ID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("archiving job... (id: %s): clusterId=%s, jobId=%s, userId=%s, startTime=%s, nodes=%v\n", job.ID, job.ClusterID, job.JobID, job.UserID, job.StartTime, job.Nodes)
|
||||||
|
if api.AsyncArchiving {
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(StopJobApiRespone{
|
||||||
|
DBID: job.ID,
|
||||||
|
})
|
||||||
|
go doArchiving(job, context.Background())
|
||||||
|
} else {
|
||||||
|
err := doArchiving(job, r.Context())
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
} else {
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -137,9 +137,9 @@ func loadAveragesFromArchive(job *model.Job, metrics []string, data [][]schema.F
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Writes a running job to the job-archive
|
// Writes a running job to the job-archive
|
||||||
func ArchiveJob(job *model.Job, ctx context.Context) error {
|
func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||||
if job.State != model.JobStateRunning {
|
if job.State != model.JobStateRunning {
|
||||||
return errors.New("cannot archive job that is not running")
|
return nil, errors.New("cannot archive job that is not running")
|
||||||
}
|
}
|
||||||
|
|
||||||
allMetrics := make([]string, 0)
|
allMetrics := make([]string, 0)
|
||||||
@ -149,7 +149,7 @@ func ArchiveJob(job *model.Job, ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
jobData, err := LoadData(job, allMetrics, ctx)
|
jobData, err := LoadData(job, allMetrics, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tags := []struct {
|
tags := []struct {
|
||||||
@ -195,39 +195,46 @@ func ArchiveJob(job *model.Job, ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the file based archive is disabled,
|
||||||
|
// only return the JobMeta structure as the
|
||||||
|
// statistics in there are needed.
|
||||||
|
if !useArchive {
|
||||||
|
return metaData, nil
|
||||||
|
}
|
||||||
|
|
||||||
dirPath, err := getPath(job, "", false)
|
dirPath, err := getPath(job, "", false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.MkdirAll(dirPath, 0777); err != nil {
|
if err := os.MkdirAll(dirPath, 0777); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := os.Create(path.Join(dirPath, "meta.json"))
|
f, err := os.Create(path.Join(dirPath, "meta.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
writer := bufio.NewWriter(f)
|
writer := bufio.NewWriter(f)
|
||||||
if err := json.NewEncoder(writer).Encode(metaData); err != nil {
|
if err := json.NewEncoder(writer).Encode(metaData); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
if err := writer.Flush(); err != nil {
|
if err := writer.Flush(); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err = os.Create(path.Join(dirPath, "data.json"))
|
f, err = os.Create(path.Join(dirPath, "data.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
writer = bufio.NewWriter(f)
|
writer = bufio.NewWriter(f)
|
||||||
if err := json.NewEncoder(writer).Encode(jobData); err != nil {
|
if err := json.NewEncoder(writer).Encode(jobData); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
if err := writer.Flush(); err != nil {
|
if err := writer.Flush(); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return f.Close()
|
return metaData, f.Close()
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,10 @@ var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepos
|
|||||||
|
|
||||||
var JobArchivePath string
|
var JobArchivePath string
|
||||||
|
|
||||||
func Init(jobArchivePath string) error {
|
var useArchive bool
|
||||||
|
|
||||||
|
func Init(jobArchivePath string, disableArchive bool) error {
|
||||||
|
useArchive = !disableArchive
|
||||||
JobArchivePath = jobArchivePath
|
JobArchivePath = jobArchivePath
|
||||||
for _, cluster := range config.Clusters {
|
for _, cluster := range config.Clusters {
|
||||||
if cluster.MetricDataRepository != nil {
|
if cluster.MetricDataRepository != nil {
|
||||||
@ -55,7 +58,7 @@ func Init(jobArchivePath string) error {
|
|||||||
|
|
||||||
// Fetches the metric data for a job.
|
// Fetches the metric data for a job.
|
||||||
func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
||||||
if job.State == model.JobStateRunning {
|
if job.State == model.JobStateRunning || !useArchive {
|
||||||
repo, ok := metricDataRepos[job.ClusterID]
|
repo, ok := metricDataRepos[job.ClusterID]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.ClusterID)
|
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.ClusterID)
|
||||||
@ -83,7 +86,7 @@ func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.Job
|
|||||||
|
|
||||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||||
func LoadAverages(job *model.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
|
func LoadAverages(job *model.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
|
||||||
if job.State != model.JobStateRunning {
|
if job.State != model.JobStateRunning && useArchive {
|
||||||
return loadAveragesFromArchive(job, metrics, data)
|
return loadAveragesFromArchive(job, metrics, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,6 +123,12 @@ func LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx
|
|||||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", clusterId)
|
return nil, fmt.Errorf("no metric data repository configured for '%s'", clusterId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
for _, m := range config.GetClusterConfig(clusterId).MetricConfig {
|
||||||
|
metrics = append(metrics, m.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
data, err := repo.LoadNodeData(clusterId, metrics, nodes, from, to, ctx)
|
data, err := repo.LoadNodeData(clusterId, metrics, nodes, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
142
rest-api.go
142
rest-api.go
@ -1,142 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
|
||||||
sq "github.com/Masterminds/squirrel"
|
|
||||||
"github.com/gorilla/mux"
|
|
||||||
)
|
|
||||||
|
|
||||||
type StartJobApiRequest struct {
|
|
||||||
JobId int64 `json:"jobId"`
|
|
||||||
UserId string `json:"userId"`
|
|
||||||
ClusterId string `json:"clusterId"`
|
|
||||||
StartTime int64 `json:"startTime"`
|
|
||||||
MetaData string `json:"metaData"`
|
|
||||||
ProjectId string `json:"projectId"`
|
|
||||||
Nodes []string `json:"nodes"`
|
|
||||||
NodeList string `json:"nodeList"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type StartJobApiRespone struct {
|
|
||||||
DBID int64 `json:"id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type StopJobApiRequest struct {
|
|
||||||
// JobId, ClusterId and StartTime are optional.
|
|
||||||
// They are only used if no database id was provided.
|
|
||||||
JobId *string `json:"jobId"`
|
|
||||||
ClusterId *string `json:"clusterId"`
|
|
||||||
StartTime *int64 `json:"startTime"`
|
|
||||||
|
|
||||||
StopTime int64 `json:"stopTime"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type StopJobApiRespone struct {
|
|
||||||
DBID string `json:"id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func startJob(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
req := StartJobApiRequest{}
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.GetClusterConfig(req.ClusterId) == nil {
|
|
||||||
http.Error(rw, fmt.Sprintf("cluster '%s' does not exist", req.ClusterId), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if req.Nodes == nil {
|
|
||||||
req.Nodes = strings.Split(req.NodeList, "|")
|
|
||||||
if len(req.Nodes) == 1 {
|
|
||||||
req.Nodes = strings.Split(req.NodeList, ",")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(req.Nodes) == 0 || len(req.Nodes[0]) == 0 || len(req.UserId) == 0 {
|
|
||||||
http.Error(rw, "required fields are missing", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
res, err := db.Exec(
|
|
||||||
`INSERT INTO job (job_id, user_id, project_id, cluster_id, start_time, duration, job_state, num_nodes, node_list, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);`,
|
|
||||||
req.JobId, req.UserId, req.ProjectId, req.ClusterId, req.StartTime, 0, model.JobStateRunning, len(req.Nodes), strings.Join(req.Nodes, ","), req.MetaData)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
id, err := res.LastInsertId()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("new job (id: %d): clusterId=%s, jobId=%d, userId=%s, startTime=%d, nodes=%v\n", id, req.ClusterId, req.JobId, req.UserId, req.StartTime, req.Nodes)
|
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
|
||||||
rw.WriteHeader(http.StatusCreated)
|
|
||||||
json.NewEncoder(rw).Encode(StartJobApiRespone{
|
|
||||||
DBID: id,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func stopJob(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
req := StopJobApiRequest{}
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var err error
|
|
||||||
var job *model.Job
|
|
||||||
id, ok := mux.Vars(r)["id"]
|
|
||||||
if ok {
|
|
||||||
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").Where("job.id = ?", id).RunWith(db).QueryRow())
|
|
||||||
} else {
|
|
||||||
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").
|
|
||||||
Where("job.job_id = ?", req.JobId).
|
|
||||||
Where("job.cluster_id = ?", req.ClusterId).
|
|
||||||
Where("job.start_time = ?", req.StartTime).
|
|
||||||
RunWith(db).QueryRow())
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != model.JobStateRunning {
|
|
||||||
http.Error(rw, "stop_time must be larger than start_time and only running jobs can be stopped", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
job.Duration = int(req.StopTime - job.StartTime.Unix())
|
|
||||||
|
|
||||||
if err := metricdata.ArchiveJob(job, r.Context()); err != nil {
|
|
||||||
log.Printf("archiving job (id: %s) failed: %s\n", job.ID, err.Error())
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := db.Exec(
|
|
||||||
`UPDATE job SET job_state = ?, duration = ? WHERE job.id = ?`,
|
|
||||||
model.JobStateCompleted, job.Duration, job.ID); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("job stoped and archived (id: %s): clusterId=%s, jobId=%s, userId=%s, startTime=%s, nodes=%v\n", job.ID, job.ClusterID, job.JobID, job.UserID, job.StartTime, job.Nodes)
|
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
|
||||||
rw.WriteHeader(http.StatusCreated)
|
|
||||||
json.NewEncoder(rw).Encode(StopJobApiRespone{
|
|
||||||
DBID: job.ID,
|
|
||||||
})
|
|
||||||
}
|
|
65
server.go
65
server.go
@ -10,6 +10,7 @@ import (
|
|||||||
|
|
||||||
"github.com/99designs/gqlgen/graphql/handler"
|
"github.com/99designs/gqlgen/graphql/handler"
|
||||||
"github.com/99designs/gqlgen/graphql/playground"
|
"github.com/99designs/gqlgen/graphql/playground"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/api"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
||||||
@ -24,16 +25,40 @@ import (
|
|||||||
|
|
||||||
var db *sqlx.DB
|
var db *sqlx.DB
|
||||||
|
|
||||||
|
// Format of the configurartion (file). See below for the defaults.
|
||||||
type ProgramConfig struct {
|
type ProgramConfig struct {
|
||||||
Addr string `json:"addr"`
|
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||||
DisableAuthentication bool `json:"disable-authentication"`
|
Addr string `json:"addr"`
|
||||||
StaticFiles string `json:"static-files"`
|
|
||||||
DB string `json:"db"`
|
// Disable authentication (for everything: API, Web-UI, ...)
|
||||||
JobArchive string `json:"job-archive"`
|
DisableAuthentication bool `json:"disable-authentication"`
|
||||||
LdapConfig *auth.LdapConfig `json:"ldap"`
|
|
||||||
HttpsCertFile string `json:"https-cert-file"`
|
// Folder where static assets can be found, will be served directly
|
||||||
HttpsKeyFile string `json:"https-key-file"`
|
StaticFiles string `json:"static-files"`
|
||||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
|
||||||
|
// Currently only SQLite3 ist supported, so this should be a filename
|
||||||
|
DB string `json:"db"`
|
||||||
|
|
||||||
|
// Path to the job-archive
|
||||||
|
JobArchive string `json:"job-archive"`
|
||||||
|
|
||||||
|
// Make the /api/jobs/stop_job endpoint do the heavy work in the background.
|
||||||
|
AsyncArchiving bool `json:"async-archive"`
|
||||||
|
|
||||||
|
// Keep all metric data in the metric data repositories,
|
||||||
|
// do not write to the job-archive.
|
||||||
|
DisableArchive bool `json:"disable-archive"`
|
||||||
|
|
||||||
|
// For LDAP Authentication and user syncronisation.
|
||||||
|
LdapConfig *auth.LdapConfig `json:"ldap"`
|
||||||
|
|
||||||
|
// If both those options are not empty, use HTTPS using those certificates.
|
||||||
|
HttpsCertFile string `json:"https-cert-file"`
|
||||||
|
HttpsKeyFile string `json:"https-key-file"`
|
||||||
|
|
||||||
|
// If overwriten, at least all the options in the defaults below must
|
||||||
|
// be provided! Most options here can be overwritten by the user.
|
||||||
|
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var programConfig ProgramConfig = ProgramConfig{
|
var programConfig ProgramConfig = ProgramConfig{
|
||||||
@ -42,6 +67,8 @@ var programConfig ProgramConfig = ProgramConfig{
|
|||||||
StaticFiles: "./frontend/public",
|
StaticFiles: "./frontend/public",
|
||||||
DB: "./var/job.db",
|
DB: "./var/job.db",
|
||||||
JobArchive: "./var/job-archive",
|
JobArchive: "./var/job-archive",
|
||||||
|
AsyncArchiving: true,
|
||||||
|
DisableArchive: false,
|
||||||
LdapConfig: &auth.LdapConfig{
|
LdapConfig: &auth.LdapConfig{
|
||||||
Url: "ldap://localhost",
|
Url: "ldap://localhost",
|
||||||
UserBase: "ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
|
UserBase: "ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
|
||||||
@ -92,11 +119,15 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
db, err = sqlx.Open("sqlite3", programConfig.DB)
|
// This might need to change for other databases:
|
||||||
|
db, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", programConfig.DB))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only for sqlite, not needed for any other database:
|
||||||
|
db.SetMaxOpenConns(1)
|
||||||
|
|
||||||
// Initialize sub-modules...
|
// Initialize sub-modules...
|
||||||
|
|
||||||
if !programConfig.DisableAuthentication {
|
if !programConfig.DisableAuthentication {
|
||||||
@ -126,7 +157,7 @@ func main() {
|
|||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdata.Init(programConfig.JobArchive); err != nil {
|
if err := metricdata.Init(programConfig.JobArchive, programConfig.DisableArchive); err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -145,6 +176,11 @@ func main() {
|
|||||||
resolver := &graph.Resolver{DB: db}
|
resolver := &graph.Resolver{DB: db}
|
||||||
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||||
graphQLPlayground := playground.Handler("GraphQL playground", "/query")
|
graphQLPlayground := playground.Handler("GraphQL playground", "/query")
|
||||||
|
restApi := &api.RestApi{
|
||||||
|
DB: db,
|
||||||
|
Resolver: resolver,
|
||||||
|
AsyncArchiving: programConfig.AsyncArchiving,
|
||||||
|
}
|
||||||
|
|
||||||
handleGetLogin := func(rw http.ResponseWriter, r *http.Request) {
|
handleGetLogin := func(rw http.ResponseWriter, r *http.Request) {
|
||||||
templates.Render(rw, r, "login", &templates.Page{
|
templates.Render(rw, r, "login", &templates.Page{
|
||||||
@ -170,9 +206,7 @@ func main() {
|
|||||||
secured.Use(auth.Auth)
|
secured.Use(auth.Auth)
|
||||||
}
|
}
|
||||||
secured.Handle("/query", graphQLEndpoint)
|
secured.Handle("/query", graphQLEndpoint)
|
||||||
secured.HandleFunc("/api/jobs/start_job/", startJob).Methods(http.MethodPost)
|
|
||||||
secured.HandleFunc("/api/jobs/stop_job/", stopJob).Methods(http.MethodPost, http.MethodPut)
|
|
||||||
secured.HandleFunc("/api/jobs/stop_job/{id:[0-9]+}", stopJob).Methods(http.MethodPost, http.MethodPut)
|
|
||||||
secured.HandleFunc("/config.json", config.ServeConfig).Methods(http.MethodGet)
|
secured.HandleFunc("/config.json", config.ServeConfig).Methods(http.MethodGet)
|
||||||
|
|
||||||
secured.HandleFunc("/", func(rw http.ResponseWriter, r *http.Request) {
|
secured.HandleFunc("/", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
@ -201,12 +235,13 @@ func main() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
monitoringRoutes(secured, resolver)
|
monitoringRoutes(secured, resolver)
|
||||||
|
restApi.MountRoutes(secured)
|
||||||
|
|
||||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(programConfig.StaticFiles)))
|
r.PathPrefix("/").Handler(http.FileServer(http.Dir(programConfig.StaticFiles)))
|
||||||
handler := handlers.CORS(
|
handler := handlers.CORS(
|
||||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization"}),
|
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization"}),
|
||||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||||
handlers.AllowedOrigins([]string{"*"}))(handlers.LoggingHandler(os.Stdout, r))
|
handlers.AllowedOrigins([]string{"*"}))(handlers.LoggingHandler(os.Stdout, handlers.CompressHandler(r)))
|
||||||
|
|
||||||
// Start http or https server
|
// Start http or https server
|
||||||
if programConfig.HttpsCertFile != "" && programConfig.HttpsKeyFile != "" {
|
if programConfig.HttpsCertFile != "" && programConfig.HttpsKeyFile != "" {
|
||||||
|
Loading…
Reference in New Issue
Block a user