mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-04-19 02:05:54 +02:00
commit
ff24d946fd
4
.env
Normal file
4
.env
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
export JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||||
|
export JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||||
|
export SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||||
|
export LDAP_ADMIN_PASSWORD="mashup"
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -1,3 +1,3 @@
|
|||||||
[submodule "frontend"]
|
[submodule "frontend"]
|
||||||
path = frontend
|
path = frontend
|
||||||
url = git@github.com:ClusterCockpit/cc-svelte-datatable.git
|
url = git@github.com:ClusterCockpit/cc-frontend.git
|
||||||
|
18
README.md
18
README.md
@ -1,7 +1,11 @@
|
|||||||
# ClusterCockpit with a Golang backend (Only supports archived jobs)
|
# ClusterCockpit with a Golang backend
|
||||||
|
|
||||||
|
__*DOES NOT WORK WITH CURRENT FRONTEND*__
|
||||||
|
|
||||||
[](https://github.com/ClusterCockpit/cc-jobarchive/actions/workflows/test.yml)
|
[](https://github.com/ClusterCockpit/cc-jobarchive/actions/workflows/test.yml)
|
||||||
|
|
||||||
|
Create your job-archive accoring to [this specification](https://github.com/ClusterCockpit/cc-specifications). At least one cluster with a valid `cluster.json` file is required. Having no jobs in the job-archive at all is fine. You may use the sample job-archive available for download [in cc-docker/develop](https://github.com/ClusterCockpit/cc-docker/tree/develop).
|
||||||
|
|
||||||
### Run server
|
### Run server
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
@ -27,13 +31,23 @@ touch ./var/job.db
|
|||||||
# This will first initialize the job.db database by traversing all
|
# This will first initialize the job.db database by traversing all
|
||||||
# `meta.json` files in the job-archive. After that, a HTTP server on
|
# `meta.json` files in the job-archive. After that, a HTTP server on
|
||||||
# the port 8080 will be running. The `--init-db` is only needed the first time.
|
# the port 8080 will be running. The `--init-db` is only needed the first time.
|
||||||
./cc-jobarchive --init-db
|
./cc-jobarchive --init-db --add-user <your-username>:admin:<your-password>
|
||||||
|
|
||||||
# Show other options:
|
# Show other options:
|
||||||
./cc-jobarchive --help
|
./cc-jobarchive --help
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
A config file in the JSON format can be provided using `--config` to override the defaults. Look at the beginning of `server.go` for the defaults and consequently the format of the configuration file.
|
||||||
|
|
||||||
### Update GraphQL schema
|
### Update GraphQL schema
|
||||||
|
|
||||||
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API. The schema can be found in `./graph/schema.graphqls`. After changing it, you need to run `go run github.com/99designs/gqlgen` which will update `graph/model`. In case new resolvers are needed, they will be inserted into `graph/schema.resolvers.go`, where you will need to implement them.
|
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API. The schema can be found in `./graph/schema.graphqls`. After changing it, you need to run `go run github.com/99designs/gqlgen` which will update `graph/model`. In case new resolvers are needed, they will be inserted into `graph/schema.resolvers.go`, where you will need to implement them.
|
||||||
|
|
||||||
|
### TODO
|
||||||
|
|
||||||
|
- [ ] Documentation
|
||||||
|
- [ ] Write more TODOs
|
||||||
|
- [ ] Caching
|
||||||
|
- [ ] Generate JWTs based on the provided keys
|
||||||
|
171
api/openapi.yaml
Normal file
171
api/openapi.yaml
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
#
|
||||||
|
# ClusterCockpit's API spec can be exported via:
|
||||||
|
# docker exec -it cc-php php bin/console api:openapi:export --yaml
|
||||||
|
#
|
||||||
|
# This spec is written by hand and hopefully up to date with the API.
|
||||||
|
#
|
||||||
|
|
||||||
|
openapi: 3.0.3
|
||||||
|
info:
|
||||||
|
title: 'ClusterCockpit REST API'
|
||||||
|
description: 'API for batch job control'
|
||||||
|
version: 0.0.2
|
||||||
|
servers:
|
||||||
|
- url: /
|
||||||
|
description: ''
|
||||||
|
paths:
|
||||||
|
'/api/jobs/{id}':
|
||||||
|
get:
|
||||||
|
operationId: 'getJob'
|
||||||
|
summary: 'Get job resource'
|
||||||
|
parameters:
|
||||||
|
- name: id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema: { type: integer }
|
||||||
|
description: 'Database ID (Resource Identifier)'
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Job resource'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
404:
|
||||||
|
description: 'Resource not found'
|
||||||
|
'/api/jobs/tag_job/{id}':
|
||||||
|
post:
|
||||||
|
operationId: 'tagJob'
|
||||||
|
summary: 'Add a tag to a job'
|
||||||
|
parameters:
|
||||||
|
- name: id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema: { type: integer }
|
||||||
|
description: 'Job ID'
|
||||||
|
requestBody:
|
||||||
|
description: 'Array of tags to add'
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/Tag'
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Job resource'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
404:
|
||||||
|
description: 'Job or tag does not exist'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
'/api/jobs/start_job/':
|
||||||
|
post:
|
||||||
|
operationId: 'startJob'
|
||||||
|
summary: 'Add a newly started job'
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
responses:
|
||||||
|
201:
|
||||||
|
description: 'Job successfully'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: integer
|
||||||
|
description: 'The database ID assigned to this job'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
422:
|
||||||
|
description: 'The combination of jobId, clusterId and startTime does already exist'
|
||||||
|
'/api/jobs/stop_job/':
|
||||||
|
post:
|
||||||
|
operationId: stopJobViaJobID
|
||||||
|
summary: 'Mark a job as stopped. Which job to stop is specified by the request body.'
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required: [jobId, cluster, startTime, stopTime]
|
||||||
|
properties:
|
||||||
|
jobId: { type: integer }
|
||||||
|
cluster: { type: string }
|
||||||
|
startTime: { type: integer }
|
||||||
|
stopTime: { type: integer }
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Job resource'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
404:
|
||||||
|
description: 'Resource not found'
|
||||||
|
'/api/jobs/stop_job/{id}':
|
||||||
|
post:
|
||||||
|
operationId: 'stopJobViaDBID'
|
||||||
|
summary: 'Mark a job as stopped.'
|
||||||
|
parameters:
|
||||||
|
- name: id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema: { type: integer }
|
||||||
|
description: 'Database ID (Resource Identifier)'
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required: [stopTime]
|
||||||
|
properties:
|
||||||
|
stopTime: { type: integer }
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Job resource'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
404:
|
||||||
|
description: 'Resource not found'
|
||||||
|
components:
|
||||||
|
schemas:
|
||||||
|
Tag:
|
||||||
|
description: 'A job tag'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: 'Database ID'
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
description: 'Tag type'
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: 'Tag name'
|
||||||
|
Job:
|
||||||
|
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
|
||||||
|
securitySchemes:
|
||||||
|
bearerAuth:
|
||||||
|
type: http
|
||||||
|
scheme: bearer
|
||||||
|
bearerFormat: JWT
|
||||||
|
security:
|
||||||
|
- bearerAuth: [] # Applies `bearerAuth` globally
|
340
api/rest.go
Normal file
340
api/rest.go
Normal file
@ -0,0 +1,340 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RestApi struct {
|
||||||
|
DB *sqlx.DB
|
||||||
|
Resolver *graph.Resolver
|
||||||
|
AsyncArchiving bool
|
||||||
|
MachineStateDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||||
|
r = r.PathPrefix("/api").Subrouter()
|
||||||
|
r.StrictSlash(true)
|
||||||
|
|
||||||
|
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
|
r.HandleFunc("/jobs/stop_job/", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
|
r.HandleFunc("/jobs/stop_job/{id}", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
|
|
||||||
|
r.HandleFunc("/jobs/{id}", api.getJob).Methods(http.MethodGet)
|
||||||
|
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
|
||||||
|
|
||||||
|
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
|
||||||
|
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
|
||||||
|
}
|
||||||
|
|
||||||
|
type StartJobApiRespone struct {
|
||||||
|
DBID int64 `json:"id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StopJobApiRequest struct {
|
||||||
|
// JobId, ClusterId and StartTime are optional.
|
||||||
|
// They are only used if no database id was provided.
|
||||||
|
JobId *string `json:"jobId"`
|
||||||
|
Cluster *string `json:"cluster"`
|
||||||
|
StartTime *int64 `json:"startTime"`
|
||||||
|
|
||||||
|
// Payload
|
||||||
|
StopTime int64 `json:"stopTime"`
|
||||||
|
State schema.JobState `json:"jobState"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TagJobApiRequest []*struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) getJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
|
||||||
|
job, err := api.Resolver.Query().Job(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.Resolver.Job().Tags(r.Context(), job)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
job, err := api.Resolver.Query().Job(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.Resolver.Job().Tags(r.Context(), job)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tag := range req {
|
||||||
|
var tagId int64
|
||||||
|
if err := sq.Select("id").From("tag").
|
||||||
|
Where("tag.tag_type = ?", tag.Type).Where("tag.tag_name = ?", tag.Name).
|
||||||
|
RunWith(api.DB).QueryRow().Scan(&tagId); err != nil {
|
||||||
|
http.Error(rw, fmt.Sprintf("the tag '%s:%s' does not exist", tag.Type, tag.Name), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := api.DB.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, job.ID, tagId); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags = append(job.Tags, &schema.Tag{
|
||||||
|
ID: tagId,
|
||||||
|
Type: tag.Type,
|
||||||
|
Name: tag.Name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
req := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.GetClusterConfig(req.Cluster) == nil {
|
||||||
|
http.Error(rw, fmt.Sprintf("cluster '%s' does not exist", req.Cluster), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(req.Resources) == 0 || len(req.User) == 0 || req.NumNodes == 0 {
|
||||||
|
http.Error(rw, "required fields are missing", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if combination of (job_id, cluster_id, start_time) already exists:
|
||||||
|
rows, err := api.DB.Query(`SELECT job.id FROM job WHERE job.job_id = ? AND job.cluster = ? AND job.start_time = ?`,
|
||||||
|
req.JobID, req.Cluster, req.StartTime)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if rows.Next() {
|
||||||
|
var id int64 = -1
|
||||||
|
rows.Scan(&id)
|
||||||
|
http.Error(rw, fmt.Sprintf("a job with that job_id, cluster_id and start_time already exists (database id: %d)", id), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
req.RawResources, err = json.Marshal(req.Resources)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := api.DB.NamedExec(`INSERT INTO job (
|
||||||
|
job_id, user, project, cluster, partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
|
exclusive, monitoring_status, smt, job_state, start_time, duration, resources, meta_data
|
||||||
|
) VALUES (
|
||||||
|
:job_id, :user, :project, :cluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||||
|
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :resources, :meta_data
|
||||||
|
);`, req)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
id, err := res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d\n", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusCreated)
|
||||||
|
json.NewEncoder(rw).Encode(StartJobApiRespone{
|
||||||
|
DBID: id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
req := StopJobApiRequest{}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
var sql string
|
||||||
|
var args []interface{}
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
if ok {
|
||||||
|
sql, args, err = sq.Select(schema.JobColumns...).From("job").Where("job.id = ?", id).ToSql()
|
||||||
|
} else {
|
||||||
|
sql, args, err = sq.Select(schema.JobColumns...).From("job").
|
||||||
|
Where("job.job_id = ?", req.JobId).
|
||||||
|
Where("job.cluster = ?", req.Cluster).
|
||||||
|
Where("job.start_time = ?", req.StartTime).ToSql()
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
job, err := schema.ScanJob(api.DB.QueryRowx(sql, args...))
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
|
||||||
|
http.Error(rw, "stop_time must be larger than start_time and only running jobs can be stopped", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.State != "" && !req.State.Valid() {
|
||||||
|
http.Error(rw, fmt.Sprintf("invalid job state: '%s'", req.State), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
req.State = schema.JobStateCompleted
|
||||||
|
}
|
||||||
|
|
||||||
|
doArchiving := func(job *schema.Job, ctx context.Context) error {
|
||||||
|
job.Duration = int32(req.StopTime - job.StartTime.Unix())
|
||||||
|
jobMeta, err := metricdata.ArchiveJob(job, ctx)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("archiving job (dbid: %d) failed: %s\n", job.ID, err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt := sq.Update("job").
|
||||||
|
Set("job_state", req.State).
|
||||||
|
Set("duration", job.Duration).
|
||||||
|
Where("job.id = ?", job.ID)
|
||||||
|
|
||||||
|
for metric, stats := range jobMeta.Statistics {
|
||||||
|
switch metric {
|
||||||
|
case "flops_any":
|
||||||
|
stmt = stmt.Set("flops_any_avg", stats.Avg)
|
||||||
|
case "mem_used":
|
||||||
|
stmt = stmt.Set("mem_used_max", stats.Max)
|
||||||
|
case "mem_bw":
|
||||||
|
stmt = stmt.Set("mem_bw_avg", stats.Avg)
|
||||||
|
case "load":
|
||||||
|
stmt = stmt.Set("load_avg", stats.Avg)
|
||||||
|
case "net_bw":
|
||||||
|
stmt = stmt.Set("net_bw_avg", stats.Avg)
|
||||||
|
case "file_bw":
|
||||||
|
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sql, args, err := stmt.ToSql()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("archiving job (dbid: %d) failed: %s\n", job.ID, err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := api.DB.Exec(sql, args...); err != nil {
|
||||||
|
log.Printf("archiving job (dbid: %d) failed: %s\n", job.ID, err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("job stopped and archived (dbid: %d)\n", job.ID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s\n", job.ID, job.Cluster, job.JobID, job.User, job.StartTime)
|
||||||
|
if api.AsyncArchiving {
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
go doArchiving(job, context.Background())
|
||||||
|
} else {
|
||||||
|
err := doArchiving(job, r.Context())
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
} else {
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
if api.MachineStateDir == "" {
|
||||||
|
http.Error(rw, "not enabled", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
vars := mux.Vars(r)
|
||||||
|
cluster := vars["cluster"]
|
||||||
|
host := vars["host"]
|
||||||
|
dir := filepath.Join(api.MachineStateDir, cluster)
|
||||||
|
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
filename := filepath.Join(dir, fmt.Sprintf("%s.json", host))
|
||||||
|
f, err := os.Create(filename)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
if _, err := io.Copy(f, r.Body); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusCreated)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
if api.MachineStateDir == "" {
|
||||||
|
http.Error(rw, "not enabled", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
vars := mux.Vars(r)
|
||||||
|
filename := filepath.Join(api.MachineStateDir, vars["cluster"], fmt.Sprintf("%s.json", vars["host"]))
|
||||||
|
|
||||||
|
// Sets the content-type and 'Last-Modified' Header and so on automatically
|
||||||
|
http.ServeFile(rw, r, filename)
|
||||||
|
}
|
339
auth/auth.go
Normal file
339
auth/auth.go
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/ed25519"
|
||||||
|
"crypto/rand"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/templates"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
"github.com/golang-jwt/jwt/v4"
|
||||||
|
"github.com/gorilla/sessions"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
"golang.org/x/crypto/bcrypt"
|
||||||
|
)
|
||||||
|
|
||||||
|
type User struct {
|
||||||
|
Username string
|
||||||
|
Password string
|
||||||
|
Name string
|
||||||
|
IsAdmin bool
|
||||||
|
IsAPIUser bool
|
||||||
|
ViaLdap bool
|
||||||
|
Email string
|
||||||
|
}
|
||||||
|
|
||||||
|
type ContextKey string
|
||||||
|
|
||||||
|
const ContextUserKey ContextKey = "user"
|
||||||
|
|
||||||
|
var JwtPublicKey ed25519.PublicKey
|
||||||
|
var JwtPrivateKey ed25519.PrivateKey
|
||||||
|
|
||||||
|
var sessionStore *sessions.CookieStore
|
||||||
|
|
||||||
|
func Init(db *sqlx.DB, ldapConfig *LdapConfig) error {
|
||||||
|
_, err := db.Exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS user (
|
||||||
|
username varchar(255) PRIMARY KEY,
|
||||||
|
password varchar(255) DEFAULT NULL,
|
||||||
|
ldap tinyint DEFAULT 0,
|
||||||
|
name varchar(255) DEFAULT NULL,
|
||||||
|
roles varchar(255) DEFAULT NULL,
|
||||||
|
email varchar(255) DEFAULT NULL);`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sessKey := os.Getenv("SESSION_KEY")
|
||||||
|
if sessKey == "" {
|
||||||
|
log.Println("warning: environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||||
|
bytes := make([]byte, 32)
|
||||||
|
if _, err := rand.Read(bytes); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
sessionStore = sessions.NewCookieStore(bytes)
|
||||||
|
} else {
|
||||||
|
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
sessionStore = sessions.NewCookieStore(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||||
|
if pubKey == "" || privKey == "" {
|
||||||
|
log.Println("warning: environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||||
|
} else {
|
||||||
|
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
JwtPublicKey = ed25519.PublicKey(bytes)
|
||||||
|
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
JwtPrivateKey = ed25519.PrivateKey(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ldapConfig != nil {
|
||||||
|
if err := initLdap(ldapConfig); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// arg must be formated like this: "<username>:[admin]:<password>"
|
||||||
|
func AddUserToDB(db *sqlx.DB, arg string) error {
|
||||||
|
parts := strings.SplitN(arg, ":", 3)
|
||||||
|
if len(parts) != 3 || len(parts[0]) == 0 || len(parts[2]) == 0 || !(len(parts[1]) == 0 || parts[1] == "admin") {
|
||||||
|
return errors.New("invalid argument format")
|
||||||
|
}
|
||||||
|
|
||||||
|
password, err := bcrypt.GenerateFromPassword([]byte(parts[2]), bcrypt.DefaultCost)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
roles := "[]"
|
||||||
|
if parts[1] == "admin" {
|
||||||
|
roles = "[\"ROLE_ADMIN\"]"
|
||||||
|
}
|
||||||
|
if parts[1] == "api" {
|
||||||
|
roles = "[\"ROLE_API\"]"
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = sq.Insert("user").Columns("username", "password", "roles").Values(parts[0], string(password), roles).RunWith(db).Exec()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Printf("new user '%s' added (roles: %s)\n", parts[0], roles)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func DelUserFromDB(db *sqlx.DB, username string) error {
|
||||||
|
_, err := db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func FetchUserFromDB(db *sqlx.DB, username string) (*User, error) {
|
||||||
|
user := &User{Username: username}
|
||||||
|
var hashedPassword, name, rawRoles, email sql.NullString
|
||||||
|
if err := sq.Select("password", "ldap", "name", "roles", "email").From("user").
|
||||||
|
Where("user.username = ?", username).RunWith(db).
|
||||||
|
QueryRow().Scan(&hashedPassword, &user.ViaLdap, &name, &rawRoles, &email); err != nil {
|
||||||
|
return nil, fmt.Errorf("user '%s' not found (%s)", username, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
user.Password = hashedPassword.String
|
||||||
|
user.Name = name.String
|
||||||
|
user.Email = email.String
|
||||||
|
var roles []string
|
||||||
|
if rawRoles.Valid {
|
||||||
|
json.Unmarshal([]byte(rawRoles.String), &roles)
|
||||||
|
}
|
||||||
|
for _, role := range roles {
|
||||||
|
switch role {
|
||||||
|
case "ROLE_ADMIN":
|
||||||
|
user.IsAdmin = true
|
||||||
|
case "ROLE_API":
|
||||||
|
user.IsAPIUser = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return user, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle a POST request that should log the user in,
|
||||||
|
// starting a new session.
|
||||||
|
func Login(db *sqlx.DB) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
username, password := r.FormValue("username"), r.FormValue("password")
|
||||||
|
user, err := FetchUserFromDB(db, username)
|
||||||
|
if err == nil && user.ViaLdap && ldapAuthEnabled {
|
||||||
|
err = loginViaLdap(user, password)
|
||||||
|
} else if err == nil && !user.ViaLdap && user.Password != "" {
|
||||||
|
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(password)); e != nil {
|
||||||
|
err = fmt.Errorf("user '%s' provided the wrong password (%s)", username, e.Error())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
err = errors.New("could not authenticate user")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("login failed: %s\n", err.Error())
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
templates.Render(rw, r, "login.html", &templates.Page{
|
||||||
|
Title: "Login failed",
|
||||||
|
Login: &templates.LoginPage{
|
||||||
|
Error: "Username or password incorrect",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
session, err := sessionStore.New(r, "session")
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("session creation failed: %s\n", err.Error())
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
session.Values["username"] = user.Username
|
||||||
|
session.Values["is_admin"] = user.IsAdmin
|
||||||
|
if err := sessionStore.Save(r, rw, session); err != nil {
|
||||||
|
log.Printf("session save failed: %s\n", err.Error())
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("login successfull: user: %#v\n", user)
|
||||||
|
http.Redirect(rw, r, "/", http.StatusTemporaryRedirect)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
var ErrTokenInvalid error = errors.New("invalid token")
|
||||||
|
|
||||||
|
func authViaToken(r *http.Request) (*User, error) {
|
||||||
|
if JwtPublicKey == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
rawtoken := r.Header.Get("X-Auth-Token")
|
||||||
|
if rawtoken == "" {
|
||||||
|
rawtoken = r.Header.Get("Authorization")
|
||||||
|
prefix := "Bearer "
|
||||||
|
if !strings.HasPrefix(rawtoken, prefix) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
rawtoken = rawtoken[len(prefix):]
|
||||||
|
}
|
||||||
|
|
||||||
|
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||||
|
if t.Method != jwt.SigningMethodEdDSA {
|
||||||
|
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||||
|
}
|
||||||
|
return JwtPublicKey, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, ErrTokenInvalid
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := token.Claims.Valid(); err != nil {
|
||||||
|
return nil, ErrTokenInvalid
|
||||||
|
}
|
||||||
|
|
||||||
|
claims := token.Claims.(jwt.MapClaims)
|
||||||
|
sub, _ := claims["sub"].(string)
|
||||||
|
isAdmin, _ := claims["is_admin"].(bool)
|
||||||
|
isAPIUser, _ := claims["is_api"].(bool)
|
||||||
|
return &User{
|
||||||
|
Username: sub,
|
||||||
|
IsAdmin: isAdmin,
|
||||||
|
IsAPIUser: isAPIUser,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Authenticate the user and put a User object in the
|
||||||
|
// context of the request. If authentication fails,
|
||||||
|
// do not continue but send client to the login screen.
|
||||||
|
func Auth(next http.Handler) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := authViaToken(r)
|
||||||
|
if err == ErrTokenInvalid {
|
||||||
|
log.Printf("authentication failed: invalid token\n")
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user != nil {
|
||||||
|
ctx := context.WithValue(r.Context(), ContextUserKey, user)
|
||||||
|
next.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
session, err := sessionStore.Get(r, "session")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if session.IsNew {
|
||||||
|
log.Printf("authentication failed: no session or jwt found\n")
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
templates.Render(rw, r, "login.html", &templates.Page{
|
||||||
|
Title: "Authentication failed",
|
||||||
|
Login: &templates.LoginPage{
|
||||||
|
Error: "No valid session or JWT provided",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.WithValue(r.Context(), ContextUserKey, &User{
|
||||||
|
Username: session.Values["username"].(string),
|
||||||
|
IsAdmin: session.Values["is_admin"].(bool),
|
||||||
|
})
|
||||||
|
next.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate a new JWT that can be used for authentication
|
||||||
|
func ProvideJWT(user *User) (string, error) {
|
||||||
|
if JwtPrivateKey == nil {
|
||||||
|
return "", errors.New("environment variable 'JWT_PUBLIC_KEY' not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
tok := jwt.NewWithClaims(jwt.SigningMethodEdDSA, jwt.MapClaims{
|
||||||
|
"sub": user.Username,
|
||||||
|
"is_admin": user.IsAdmin,
|
||||||
|
"is_api": user.IsAPIUser,
|
||||||
|
})
|
||||||
|
|
||||||
|
return tok.SignedString(JwtPrivateKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetUser(ctx context.Context) *User {
|
||||||
|
x := ctx.Value(ContextUserKey)
|
||||||
|
if x == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return x.(*User)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clears the session cookie
|
||||||
|
func Logout(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
session, err := sessionStore.Get(r, "session")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !session.IsNew {
|
||||||
|
session.Options.MaxAge = -1
|
||||||
|
if err := sessionStore.Save(r, rw, session); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
templates.Render(rw, r, "login.html", &templates.Page{
|
||||||
|
Title: "Logout successful",
|
||||||
|
Login: &templates.LoginPage{
|
||||||
|
Info: "Logout successful",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
183
auth/ldap.go
Normal file
183
auth/ldap.go
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/tls"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/go-ldap/ldap/v3"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LdapConfig struct {
|
||||||
|
Url string `json:"url"`
|
||||||
|
UserBase string `json:"user_base"`
|
||||||
|
SearchDN string `json:"search_dn"`
|
||||||
|
UserBind string `json:"user_bind"`
|
||||||
|
UserFilter string `json:"user_filter"`
|
||||||
|
TLS bool `json:"tls"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var ldapAuthEnabled bool = false
|
||||||
|
var ldapConfig *LdapConfig
|
||||||
|
var ldapAdminPassword string
|
||||||
|
|
||||||
|
func initLdap(config *LdapConfig) error {
|
||||||
|
ldapAdminPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
|
||||||
|
if ldapAdminPassword == "" {
|
||||||
|
log.Println("warning: environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync or authentication will not work)")
|
||||||
|
}
|
||||||
|
|
||||||
|
ldapConfig = config
|
||||||
|
ldapAuthEnabled = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var ldapConnectionsLock sync.Mutex
|
||||||
|
var ldapConnections []*ldap.Conn = []*ldap.Conn{}
|
||||||
|
|
||||||
|
// TODO: Add a connection pool or something like
|
||||||
|
// that so that connections can be reused/cached.
|
||||||
|
func getLdapConnection() (*ldap.Conn, error) {
|
||||||
|
ldapConnectionsLock.Lock()
|
||||||
|
n := len(ldapConnections)
|
||||||
|
if n > 0 {
|
||||||
|
conn := ldapConnections[n-1]
|
||||||
|
ldapConnections = ldapConnections[:n-1]
|
||||||
|
ldapConnectionsLock.Unlock()
|
||||||
|
return conn, nil
|
||||||
|
}
|
||||||
|
ldapConnectionsLock.Unlock()
|
||||||
|
|
||||||
|
conn, err := ldap.DialURL(ldapConfig.Url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if ldapConfig.TLS {
|
||||||
|
if err := conn.StartTLS(&tls.Config{InsecureSkipVerify: true}); err != nil {
|
||||||
|
conn.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := conn.Bind(ldapConfig.SearchDN, ldapAdminPassword); err != nil {
|
||||||
|
conn.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return conn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func releaseConnection(conn *ldap.Conn) {
|
||||||
|
// Re-bind to the user we can run queries with
|
||||||
|
if err := conn.Bind(ldapConfig.SearchDN, ldapAdminPassword); err != nil {
|
||||||
|
conn.Close()
|
||||||
|
log.Printf("ldap error: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
ldapConnectionsLock.Lock()
|
||||||
|
defer ldapConnectionsLock.Unlock()
|
||||||
|
|
||||||
|
n := len(ldapConnections)
|
||||||
|
if n > 2 {
|
||||||
|
conn.Close()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ldapConnections = append(ldapConnections, conn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func loginViaLdap(user *User, password string) error {
|
||||||
|
l, err := getLdapConnection()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer releaseConnection(l)
|
||||||
|
|
||||||
|
userDn := strings.Replace(ldapConfig.UserBind, "{username}", user.Username, -1)
|
||||||
|
if err := l.Bind(userDn, password); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
user.ViaLdap = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete users where user.ldap is 1 and that do not show up in the ldap search results.
|
||||||
|
// Add users to the users table that are new in the ldap search results.
|
||||||
|
func SyncWithLDAP(db *sqlx.DB) error {
|
||||||
|
if !ldapAuthEnabled {
|
||||||
|
return errors.New("ldap not enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
const IN_DB int = 1
|
||||||
|
const IN_LDAP int = 2
|
||||||
|
const IN_BOTH int = 3
|
||||||
|
|
||||||
|
users := map[string]int{}
|
||||||
|
rows, err := db.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
var username string
|
||||||
|
if err := rows.Scan(&username); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
users[username] = IN_DB
|
||||||
|
}
|
||||||
|
|
||||||
|
l, err := getLdapConnection()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer releaseConnection(l)
|
||||||
|
|
||||||
|
ldapResults, err := l.Search(ldap.NewSearchRequest(
|
||||||
|
ldapConfig.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
|
||||||
|
ldapConfig.UserFilter, []string{"dn", "uid", "gecos"}, nil))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
newnames := map[string]string{}
|
||||||
|
for _, entry := range ldapResults.Entries {
|
||||||
|
username := entry.GetAttributeValue("uid")
|
||||||
|
if username == "" {
|
||||||
|
return errors.New("no attribute 'uid'")
|
||||||
|
}
|
||||||
|
|
||||||
|
_, ok := users[username]
|
||||||
|
if !ok {
|
||||||
|
users[username] = IN_LDAP
|
||||||
|
newnames[username] = entry.GetAttributeValue("gecos")
|
||||||
|
} else {
|
||||||
|
users[username] = IN_BOTH
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for username, where := range users {
|
||||||
|
if where == IN_DB {
|
||||||
|
fmt.Printf("ldap-sync: remove '%s' (does not show up in LDAP anymore)\n", username)
|
||||||
|
if _, err := db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else if where == IN_LDAP {
|
||||||
|
name := newnames[username]
|
||||||
|
fmt.Printf("ldap-sync: add '%s' (name: '%s', roles: [], ldap: true)\n", username, name)
|
||||||
|
if _, err := db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`,
|
||||||
|
username, 1, name, "[]"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
150
config/config.go
150
config/config.go
@ -3,82 +3,160 @@ package config
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var db *sqlx.DB
|
||||||
var lock sync.RWMutex
|
var lock sync.RWMutex
|
||||||
var config map[string]interface{}
|
var uiDefaults map[string]interface{}
|
||||||
|
|
||||||
var Clusters []*model.Cluster
|
var Clusters []*model.Cluster
|
||||||
|
|
||||||
const configFilePath string = "./var/ui.config.json"
|
func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, jobArchive string) error {
|
||||||
|
db = usersdb
|
||||||
func init() {
|
uiDefaults = uiConfig
|
||||||
lock.Lock()
|
entries, err := os.ReadDir(jobArchive)
|
||||||
defer lock.Unlock()
|
|
||||||
|
|
||||||
bytes, err := os.ReadFile(configFilePath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(bytes, &config); err != nil {
|
Clusters = []*model.Cluster{}
|
||||||
log.Fatal(err)
|
for _, de := range entries {
|
||||||
|
bytes, err := os.ReadFile(filepath.Join(jobArchive, de.Name(), "cluster.json"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var cluster model.Cluster
|
||||||
|
if err := json.Unmarshal(bytes, &cluster); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if cluster.FilterRanges.StartTime.To.IsZero() {
|
||||||
|
cluster.FilterRanges.StartTime.To = time.Unix(0, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cluster.Name != de.Name() {
|
||||||
|
return fmt.Errorf("the file '%s/cluster.json' contains the clusterId '%s'", de.Name(), cluster.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
Clusters = append(Clusters, &cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
if authEnabled {
|
||||||
|
_, err := db.Exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS configuration (
|
||||||
|
username varchar(255),
|
||||||
|
key varchar(255),
|
||||||
|
value varchar(255),
|
||||||
|
PRIMARY KEY (username, key),
|
||||||
|
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Call this function to change the current configuration.
|
// Return the personalised UI config for the currently authenticated
|
||||||
// `value` must be valid JSON. This This function is thread-safe.
|
// user or return the plain default config.
|
||||||
func UpdateConfig(key, value string, ctx context.Context) error {
|
func GetUIConfig(r *http.Request) (map[string]interface{}, error) {
|
||||||
var v interface{}
|
lock.RLock()
|
||||||
if err := json.Unmarshal([]byte(value), &v); err != nil {
|
config := make(map[string]interface{}, len(uiDefaults))
|
||||||
return err
|
for k, v := range uiDefaults {
|
||||||
|
config[k] = v
|
||||||
|
}
|
||||||
|
lock.RUnlock()
|
||||||
|
|
||||||
|
user := auth.GetUser(r.Context())
|
||||||
|
if user == nil {
|
||||||
|
return config, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
lock.Lock()
|
rows, err := db.Query(`SELECT key, value FROM configuration WHERE configuration.username = ?`, user.Username)
|
||||||
defer lock.Unlock()
|
|
||||||
|
|
||||||
config[key] = v
|
|
||||||
bytes, err := json.Marshal(config)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
var key, rawval string
|
||||||
|
if err := rows.Scan(&key, &rawval); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var val interface{}
|
||||||
|
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
config[key] = val
|
||||||
|
}
|
||||||
|
|
||||||
|
return config, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the context does not have a user, update the global ui configuration without persisting it!
|
||||||
|
// If there is a (authenticated) user, update only his configuration.
|
||||||
|
func UpdateConfig(key, value string, ctx context.Context) error {
|
||||||
|
user := auth.GetUser(ctx)
|
||||||
|
if user == nil {
|
||||||
|
lock.RLock()
|
||||||
|
defer lock.RUnlock()
|
||||||
|
|
||||||
|
var val interface{}
|
||||||
|
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(configFilePath, bytes, 0644); err != nil {
|
uiDefaults[key] = val
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := db.Exec(`REPLACE INTO configuration (username, key, value) VALUES (?, ?, ?)`,
|
||||||
|
user.Username, key, value); err != nil {
|
||||||
|
log.Printf("db.Exec: %s\n", err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// http.HandlerFunc compatible function that serves the current configuration as JSON
|
|
||||||
func ServeConfig(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
lock.RLock()
|
|
||||||
defer lock.RUnlock()
|
|
||||||
|
|
||||||
rw.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(rw).Encode(config); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetClusterConfig(cluster string) *model.Cluster {
|
func GetClusterConfig(cluster string) *model.Cluster {
|
||||||
for _, c := range Clusters {
|
for _, c := range Clusters {
|
||||||
if c.ClusterID == cluster {
|
if c.Name == cluster {
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetPartition(cluster, partition string) *model.Partition {
|
||||||
|
for _, c := range Clusters {
|
||||||
|
if c.Name == cluster {
|
||||||
|
for _, p := range c.Partitions {
|
||||||
|
if p.Name == partition {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func GetMetricConfig(cluster, metric string) *model.MetricConfig {
|
func GetMetricConfig(cluster, metric string) *model.MetricConfig {
|
||||||
for _, c := range Clusters {
|
for _, c := range Clusters {
|
||||||
if c.ClusterID == cluster {
|
if c.Name == cluster {
|
||||||
for _, m := range c.MetricConfig {
|
for _, m := range c.MetricConfig {
|
||||||
if m.Name == metric {
|
if m.Name == metric {
|
||||||
return m
|
return m
|
||||||
|
2
frontend
2
frontend
@ -1 +1 @@
|
|||||||
Subproject commit b487af3496b46942d9848337bc2821575a1390b2
|
Subproject commit cc48461a810dbd3565000150fc99332743de92ba
|
8
go.mod
8
go.mod
@ -5,9 +5,15 @@ go 1.15
|
|||||||
require (
|
require (
|
||||||
github.com/99designs/gqlgen v0.13.0
|
github.com/99designs/gqlgen v0.13.0
|
||||||
github.com/Masterminds/squirrel v1.5.1
|
github.com/Masterminds/squirrel v1.5.1
|
||||||
|
github.com/go-ldap/ldap/v3 v3.4.1
|
||||||
|
github.com/golang-jwt/jwt/v4 v4.1.0
|
||||||
github.com/gorilla/handlers v1.5.1
|
github.com/gorilla/handlers v1.5.1
|
||||||
github.com/gorilla/mux v1.6.1
|
github.com/gorilla/mux v1.8.0
|
||||||
|
github.com/gorilla/sessions v1.2.1
|
||||||
github.com/jmoiron/sqlx v1.3.1
|
github.com/jmoiron/sqlx v1.3.1
|
||||||
github.com/mattn/go-sqlite3 v1.14.6
|
github.com/mattn/go-sqlite3 v1.14.6
|
||||||
|
github.com/stretchr/testify v1.5.1 // indirect
|
||||||
github.com/vektah/gqlparser/v2 v2.1.0
|
github.com/vektah/gqlparser/v2 v2.1.0
|
||||||
|
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871
|
||||||
|
gopkg.in/yaml.v2 v2.3.0 // indirect
|
||||||
)
|
)
|
||||||
|
32
go.sum
32
go.sum
@ -1,5 +1,7 @@
|
|||||||
github.com/99designs/gqlgen v0.13.0 h1:haLTcUp3Vwp80xMVEg5KRNwzfUrgFdRmtBY8fuB8scA=
|
github.com/99designs/gqlgen v0.13.0 h1:haLTcUp3Vwp80xMVEg5KRNwzfUrgFdRmtBY8fuB8scA=
|
||||||
github.com/99designs/gqlgen v0.13.0/go.mod h1:NV130r6f4tpRWuAI+zsrSdooO/eWUv+Gyyoi3rEfXIk=
|
github.com/99designs/gqlgen v0.13.0/go.mod h1:NV130r6f4tpRWuAI+zsrSdooO/eWUv+Gyyoi3rEfXIk=
|
||||||
|
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c h1:/IBSNwUN8+eKzUzbJPqhK839ygXJ82sde8x3ogr6R28=
|
||||||
|
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||||
github.com/Masterminds/squirrel v1.5.1 h1:kWAKlLLJFxZG7N2E0mBMNWVp5AuUX+JUrnhFN74Eg+w=
|
github.com/Masterminds/squirrel v1.5.1 h1:kWAKlLLJFxZG7N2E0mBMNWVp5AuUX+JUrnhFN74Eg+w=
|
||||||
github.com/Masterminds/squirrel v1.5.1/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
|
github.com/Masterminds/squirrel v1.5.1/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
|
||||||
@ -19,16 +21,26 @@ github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c h1:TUuUh0Xgj97tLMN
|
|||||||
github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||||
github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ=
|
github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ=
|
||||||
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
|
github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8=
|
||||||
|
github.com/go-asn1-ber/asn1-ber v1.5.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||||
github.com/go-chi/chi v3.3.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
|
github.com/go-chi/chi v3.3.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
|
||||||
|
github.com/go-ldap/ldap/v3 v3.4.1 h1:fU/0xli6HY02ocbMuozHAYsaHLcnkLjvho2r5a34BUU=
|
||||||
|
github.com/go-ldap/ldap/v3 v3.4.1/go.mod h1:iYS1MdmrmceOJ1QOTnRXrIs7i3kloqtmGQjRvjKpyMg=
|
||||||
github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs=
|
github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs=
|
||||||
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
|
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
|
||||||
github.com/gogo/protobuf v1.0.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
github.com/gogo/protobuf v1.0.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||||
github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f h1:9oNbS1z4rVpbnkHBdPZU4jo9bSmrLpII768arSyMFgk=
|
github.com/golang-jwt/jwt/v4 v4.1.0 h1:XUgk2Ex5veyVFVeLm0xhusUTQybEbexJXrvPNOKkSY0=
|
||||||
|
github.com/golang-jwt/jwt/v4 v4.1.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
|
||||||
github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
|
github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
|
||||||
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
|
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
|
||||||
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
|
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
|
||||||
github.com/gorilla/mux v1.6.1 h1:KOwqsTYZdeuMacU7CxjMNYEKeBvLbxW+psodrbcEa3A=
|
|
||||||
github.com/gorilla/mux v1.6.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
github.com/gorilla/mux v1.6.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||||
|
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
|
||||||
|
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
|
||||||
|
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
|
||||||
|
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
|
||||||
|
github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7FsgI=
|
||||||
|
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
|
||||||
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
|
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
|
||||||
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||||
github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=
|
github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=
|
||||||
@ -73,8 +85,9 @@ github.com/shurcooL/vfsgen v0.0.0-20180121065927-ffb13db8def0/go.mod h1:TrYk7fJV
|
|||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
|
||||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||||
|
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
|
||||||
|
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
||||||
github.com/urfave/cli/v2 v2.1.1 h1:Qt8FeAtxE/vfdrLmR3rxR6JRE0RoVmbXu8+6kZtYU4k=
|
github.com/urfave/cli/v2 v2.1.1 h1:Qt8FeAtxE/vfdrLmR3rxR6JRE0RoVmbXu8+6kZtYU4k=
|
||||||
github.com/urfave/cli/v2 v2.1.1/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ=
|
github.com/urfave/cli/v2 v2.1.1/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ=
|
||||||
github.com/vektah/dataloaden v0.2.1-0.20190515034641-a19b9a6e7c9e/go.mod h1:/HUdMve7rvxZma+2ZELQeNh88+003LL7Pf/CZ089j8U=
|
github.com/vektah/dataloaden v0.2.1-0.20190515034641-a19b9a6e7c9e/go.mod h1:/HUdMve7rvxZma+2ZELQeNh88+003LL7Pf/CZ089j8U=
|
||||||
@ -82,16 +95,26 @@ github.com/vektah/gqlparser/v2 v2.1.0 h1:uiKJ+T5HMGGQM2kRKQ8Pxw8+Zq9qhhZhz/lieYv
|
|||||||
github.com/vektah/gqlparser/v2 v2.1.0/go.mod h1:SyUiHgLATUR8BiYURfTirrTcGpcE+4XkV2se04Px1Ms=
|
github.com/vektah/gqlparser/v2 v2.1.0/go.mod h1:SyUiHgLATUR8BiYURfTirrTcGpcE+4XkV2se04Px1Ms=
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||||
|
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
|
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871 h1:/pEO3GD/ABYAjuakUS6xSEmmlyVS4kxBNkeA9tLJiTI=
|
||||||
|
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/tools v0.0.0-20190125232054-d66bd3c5d5a6/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20190125232054-d66bd3c5d5a6/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/tools v0.0.0-20190515012406-7d7faa4812bd/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
golang.org/x/tools v0.0.0-20190515012406-7d7faa4812bd/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
||||||
golang.org/x/tools v0.0.0-20200114235610-7ae403b6b589 h1:rjUrONFu4kLchcZTfp3/96bR8bW8dIa8uz3cR5n0cgM=
|
golang.org/x/tools v0.0.0-20200114235610-7ae403b6b589 h1:rjUrONFu4kLchcZTfp3/96bR8bW8dIa8uz3cR5n0cgM=
|
||||||
@ -101,7 +124,8 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
|
|||||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
|
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
|
||||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
|
|
||||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
|
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
||||||
|
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
sourcegraph.com/sourcegraph/appdash v0.0.0-20180110180208-2cc67fd64755/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU=
|
sourcegraph.com/sourcegraph/appdash v0.0.0-20180110180208-2cc67fd64755/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU=
|
||||||
sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67/go.mod h1:L5q+DGLGOQFpo1snNEkLOJT2d1YTW66rWNzatr3He1k=
|
sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67/go.mod h1:L5q+DGLGOQFpo1snNEkLOJT2d1YTW66rWNzatr3He1k=
|
||||||
|
22
gqlgen.yml
22
gqlgen.yml
@ -55,17 +55,19 @@ models:
|
|||||||
- github.com/99designs/gqlgen/graphql.Int64
|
- github.com/99designs/gqlgen/graphql.Int64
|
||||||
- github.com/99designs/gqlgen/graphql.Int32
|
- github.com/99designs/gqlgen/graphql.Int32
|
||||||
Job:
|
Job:
|
||||||
|
model: "github.com/ClusterCockpit/cc-jobarchive/schema.Job"
|
||||||
fields:
|
fields:
|
||||||
tags:
|
tags:
|
||||||
resolver: true
|
resolver: true
|
||||||
JobMetric:
|
NullableFloat: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Float" }
|
||||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobMetric"
|
MetricScope: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricScope" }
|
||||||
JobMetricSeries:
|
JobStatistics: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobStatistics" }
|
||||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricSeries"
|
Tag: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Tag" }
|
||||||
JobMetricStatistics:
|
Resource: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Resource" }
|
||||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricStatistics"
|
JobState: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobState" }
|
||||||
NullableFloat:
|
JobMetric: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobMetric" }
|
||||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.Float"
|
Series: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Series" }
|
||||||
JobMetricScope:
|
MetricStatistics: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricStatistics" }
|
||||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricScope"
|
StatsSeries: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.StatsSeries" }
|
||||||
|
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,9 +1,17 @@
|
|||||||
package model
|
package model
|
||||||
|
|
||||||
// Go look at `gqlgen.yml` and the schema package for other non-generated models.
|
type Cluster struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
MetricConfig []*MetricConfig `json:"metricConfig"`
|
||||||
|
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||||
|
Partitions []*Partition `json:"partitions"`
|
||||||
|
|
||||||
type JobTag struct {
|
// NOT part of the API:
|
||||||
ID string `json:"id" db:"id"`
|
MetricDataRepository *MetricDataRepository `json:"metricDataRepository"`
|
||||||
TagType string `json:"tagType" db:"tag_type"`
|
}
|
||||||
TagName string `json:"tagName" db:"tag_name"`
|
|
||||||
|
type MetricDataRepository struct {
|
||||||
|
Kind string `json:"kind"`
|
||||||
|
Url string `json:"url"`
|
||||||
|
Token string `json:"token"`
|
||||||
}
|
}
|
||||||
|
@ -11,17 +11,10 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Cluster struct {
|
type Accelerator struct {
|
||||||
ClusterID string `json:"clusterID"`
|
ID string `json:"id"`
|
||||||
ProcessorType string `json:"processorType"`
|
Type string `json:"type"`
|
||||||
SocketsPerNode int `json:"socketsPerNode"`
|
Model string `json:"model"`
|
||||||
CoresPerSocket int `json:"coresPerSocket"`
|
|
||||||
ThreadsPerCore int `json:"threadsPerCore"`
|
|
||||||
FlopRateScalar int `json:"flopRateScalar"`
|
|
||||||
FlopRateSimd int `json:"flopRateSimd"`
|
|
||||||
MemoryBandwidth int `json:"memoryBandwidth"`
|
|
||||||
MetricConfig []*MetricConfig `json:"metricConfig"`
|
|
||||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type FilterRanges struct {
|
type FilterRanges struct {
|
||||||
@ -50,37 +43,16 @@ type IntRangeOutput struct {
|
|||||||
To int `json:"to"`
|
To int `json:"to"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Job struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
JobID string `json:"jobId"`
|
|
||||||
UserID string `json:"userId"`
|
|
||||||
ProjectID string `json:"projectId"`
|
|
||||||
ClusterID string `json:"clusterId"`
|
|
||||||
StartTime time.Time `json:"startTime"`
|
|
||||||
Duration int `json:"duration"`
|
|
||||||
NumNodes int `json:"numNodes"`
|
|
||||||
Nodes []string `json:"nodes"`
|
|
||||||
HasProfile bool `json:"hasProfile"`
|
|
||||||
State JobState `json:"state"`
|
|
||||||
Tags []*JobTag `json:"tags"`
|
|
||||||
LoadAvg *float64 `json:"loadAvg"`
|
|
||||||
MemUsedMax *float64 `json:"memUsedMax"`
|
|
||||||
FlopsAnyAvg *float64 `json:"flopsAnyAvg"`
|
|
||||||
MemBwAvg *float64 `json:"memBwAvg"`
|
|
||||||
NetBwAvg *float64 `json:"netBwAvg"`
|
|
||||||
FileBwAvg *float64 `json:"fileBwAvg"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type JobFilter struct {
|
type JobFilter struct {
|
||||||
Tags []string `json:"tags"`
|
Tags []string `json:"tags"`
|
||||||
JobID *StringInput `json:"jobId"`
|
JobID *StringInput `json:"jobId"`
|
||||||
UserID *StringInput `json:"userId"`
|
User *StringInput `json:"user"`
|
||||||
ProjectID *StringInput `json:"projectId"`
|
Project *StringInput `json:"project"`
|
||||||
ClusterID *StringInput `json:"clusterId"`
|
Cluster *StringInput `json:"cluster"`
|
||||||
Duration *IntRange `json:"duration"`
|
Duration *IntRange `json:"duration"`
|
||||||
NumNodes *IntRange `json:"numNodes"`
|
NumNodes *IntRange `json:"numNodes"`
|
||||||
StartTime *TimeRange `json:"startTime"`
|
StartTime *TimeRange `json:"startTime"`
|
||||||
IsRunning *bool `json:"isRunning"`
|
State []schema.JobState `json:"state"`
|
||||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
|
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
|
||||||
MemBwAvg *FloatRange `json:"memBwAvg"`
|
MemBwAvg *FloatRange `json:"memBwAvg"`
|
||||||
LoadAvg *FloatRange `json:"loadAvg"`
|
LoadAvg *FloatRange `json:"loadAvg"`
|
||||||
@ -93,7 +65,7 @@ type JobMetricWithName struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type JobResultList struct {
|
type JobResultList struct {
|
||||||
Items []*Job `json:"items"`
|
Items []*schema.Job `json:"items"`
|
||||||
Offset *int `json:"offset"`
|
Offset *int `json:"offset"`
|
||||||
Limit *int `json:"limit"`
|
Limit *int `json:"limit"`
|
||||||
Count *int `json:"count"`
|
Count *int `json:"count"`
|
||||||
@ -112,11 +84,12 @@ type JobsStatistics struct {
|
|||||||
type MetricConfig struct {
|
type MetricConfig struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Unit string `json:"unit"`
|
Unit string `json:"unit"`
|
||||||
Sampletime int `json:"sampletime"`
|
Scope schema.MetricScope `json:"scope"`
|
||||||
Peak int `json:"peak"`
|
Timestep int `json:"timestep"`
|
||||||
Normal int `json:"normal"`
|
Peak float64 `json:"peak"`
|
||||||
Caution int `json:"caution"`
|
Normal float64 `json:"normal"`
|
||||||
Alert int `json:"alert"`
|
Caution float64 `json:"caution"`
|
||||||
|
Alert float64 `json:"alert"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricFootprints struct {
|
type MetricFootprints struct {
|
||||||
@ -124,6 +97,16 @@ type MetricFootprints struct {
|
|||||||
Footprints []schema.Float `json:"footprints"`
|
Footprints []schema.Float `json:"footprints"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeMetric struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Data []schema.Float `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeMetrics struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Metrics []*NodeMetric `json:"metrics"`
|
||||||
|
}
|
||||||
|
|
||||||
type OrderByInput struct {
|
type OrderByInput struct {
|
||||||
Field string `json:"field"`
|
Field string `json:"field"`
|
||||||
Order SortDirectionEnum `json:"order"`
|
Order SortDirectionEnum `json:"order"`
|
||||||
@ -134,6 +117,18 @@ type PageRequest struct {
|
|||||||
Page int `json:"page"`
|
Page int `json:"page"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Partition struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
ProcessorType string `json:"processorType"`
|
||||||
|
SocketsPerNode int `json:"socketsPerNode"`
|
||||||
|
CoresPerSocket int `json:"coresPerSocket"`
|
||||||
|
ThreadsPerCore int `json:"threadsPerCore"`
|
||||||
|
FlopRateScalar int `json:"flopRateScalar"`
|
||||||
|
FlopRateSimd int `json:"flopRateSimd"`
|
||||||
|
MemoryBandwidth int `json:"memoryBandwidth"`
|
||||||
|
Topology *Topology `json:"topology"`
|
||||||
|
}
|
||||||
|
|
||||||
type StringInput struct {
|
type StringInput struct {
|
||||||
Eq *string `json:"eq"`
|
Eq *string `json:"eq"`
|
||||||
Contains *string `json:"contains"`
|
Contains *string `json:"contains"`
|
||||||
@ -151,6 +146,15 @@ type TimeRangeOutput struct {
|
|||||||
To time.Time `json:"to"`
|
To time.Time `json:"to"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Topology struct {
|
||||||
|
Node []int `json:"node"`
|
||||||
|
Socket [][]int `json:"socket"`
|
||||||
|
MemoryDomain [][]int `json:"memoryDomain"`
|
||||||
|
Die [][]int `json:"die"`
|
||||||
|
Core [][]int `json:"core"`
|
||||||
|
Accelerators []*Accelerator `json:"accelerators"`
|
||||||
|
}
|
||||||
|
|
||||||
type Aggregate string
|
type Aggregate string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -194,47 +198,6 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
|
|||||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobState string
|
|
||||||
|
|
||||||
const (
|
|
||||||
JobStateRunning JobState = "running"
|
|
||||||
JobStateCompleted JobState = "completed"
|
|
||||||
)
|
|
||||||
|
|
||||||
var AllJobState = []JobState{
|
|
||||||
JobStateRunning,
|
|
||||||
JobStateCompleted,
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e JobState) IsValid() bool {
|
|
||||||
switch e {
|
|
||||||
case JobStateRunning, JobStateCompleted:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e JobState) String() string {
|
|
||||||
return string(e)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
|
||||||
str, ok := v.(string)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("enums must be strings")
|
|
||||||
}
|
|
||||||
|
|
||||||
*e = JobState(str)
|
|
||||||
if !e.IsValid() {
|
|
||||||
return fmt.Errorf("%s is not a valid JobState", str)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e JobState) MarshalGQL(w io.Writer) {
|
|
||||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
|
||||||
}
|
|
||||||
|
|
||||||
type SortDirectionEnum string
|
type SortDirectionEnum string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -1,12 +1,15 @@
|
|||||||
package graph
|
package graph
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
sq "github.com/Masterminds/squirrel"
|
sq "github.com/Masterminds/squirrel"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
@ -19,31 +22,10 @@ type Resolver struct {
|
|||||||
DB *sqlx.DB
|
DB *sqlx.DB
|
||||||
}
|
}
|
||||||
|
|
||||||
var JobTableCols []string = []string{"id", "job_id", "user_id", "project_id", "cluster_id", "start_time", "duration", "job_state", "num_nodes", "node_list", "flops_any_avg", "mem_bw_avg", "net_bw_avg", "file_bw_avg", "load_avg"}
|
|
||||||
|
|
||||||
type Scannable interface {
|
|
||||||
Scan(dest ...interface{}) error
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function for scanning jobs with the `jobTableCols` columns selected.
|
|
||||||
func ScanJob(row Scannable) (*model.Job, error) {
|
|
||||||
job := &model.Job{HasProfile: true}
|
|
||||||
|
|
||||||
var nodeList string
|
|
||||||
if err := row.Scan(
|
|
||||||
&job.ID, &job.JobID, &job.UserID, &job.ProjectID, &job.ClusterID,
|
|
||||||
&job.StartTime, &job.Duration, &job.State, &job.NumNodes, &nodeList,
|
|
||||||
&job.FlopsAnyAvg, &job.MemBwAvg, &job.NetBwAvg, &job.FileBwAvg, &job.LoadAvg); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
job.Nodes = strings.Split(nodeList, ",")
|
|
||||||
return job, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function for the `jobs` GraphQL-Query. Is also used elsewhere when a list of jobs is needed.
|
// Helper function for the `jobs` GraphQL-Query. Is also used elsewhere when a list of jobs is needed.
|
||||||
func (r *Resolver) queryJobs(filters []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) ([]*model.Job, int, error) {
|
func (r *Resolver) queryJobs(ctx context.Context, filters []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) ([]*schema.Job, int, error) {
|
||||||
query := sq.Select(JobTableCols...).From("job")
|
query := sq.Select(schema.JobColumns...).From("job")
|
||||||
|
query = securityCheck(ctx, query)
|
||||||
|
|
||||||
if order != nil {
|
if order != nil {
|
||||||
field := toSnakeCase(order.Field)
|
field := toSnakeCase(order.Field)
|
||||||
@ -67,55 +49,68 @@ func (r *Resolver) queryJobs(filters []*model.JobFilter, page *model.PageRequest
|
|||||||
query = buildWhereClause(f, query)
|
query = buildWhereClause(f, query)
|
||||||
}
|
}
|
||||||
|
|
||||||
rows, err := query.RunWith(r.DB).Query()
|
sql, args, err := query.ToSql()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, 0, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
|
||||||
|
|
||||||
jobs := make([]*model.Job, 0, 50)
|
rows, err := r.DB.Queryx(sql, args...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs := make([]*schema.Job, 0, 50)
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
job, err := ScanJob(rows)
|
job, err := schema.ScanJob(rows)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, 0, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
jobs = append(jobs, job)
|
jobs = append(jobs, job)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// count all jobs:
|
||||||
query = sq.Select("count(*)").From("job")
|
query = sq.Select("count(*)").From("job")
|
||||||
for _, f := range filters {
|
for _, f := range filters {
|
||||||
query = buildWhereClause(f, query)
|
query = buildWhereClause(f, query)
|
||||||
}
|
}
|
||||||
rows, err = query.RunWith(r.DB).Query()
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
defer rows.Close()
|
|
||||||
var count int
|
var count int
|
||||||
rows.Next()
|
if err := query.RunWith(r.DB).Scan(&count); err != nil {
|
||||||
if err := rows.Scan(&count); err != nil {
|
|
||||||
return nil, 0, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return jobs, count, nil
|
return jobs, count, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build a sq.SelectBuilder out of a model.JobFilter.
|
func securityCheck(ctx context.Context, query sq.SelectBuilder) sq.SelectBuilder {
|
||||||
|
val := ctx.Value(auth.ContextUserKey)
|
||||||
|
if val == nil {
|
||||||
|
return query
|
||||||
|
}
|
||||||
|
|
||||||
|
user := val.(*auth.User)
|
||||||
|
if user.IsAdmin {
|
||||||
|
return query
|
||||||
|
}
|
||||||
|
|
||||||
|
return query.Where("job.user_id = ?", user.Username)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||||
func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||||
if filter.Tags != nil {
|
if filter.Tags != nil {
|
||||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where("jobtag.tag_id IN ?", filter.Tags)
|
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
|
||||||
}
|
}
|
||||||
if filter.JobID != nil {
|
if filter.JobID != nil {
|
||||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||||
}
|
}
|
||||||
if filter.UserID != nil {
|
if filter.User != nil {
|
||||||
query = buildStringCondition("job.user_id", filter.UserID, query)
|
query = buildStringCondition("job.user", filter.User, query)
|
||||||
}
|
}
|
||||||
if filter.ProjectID != nil {
|
if filter.Project != nil {
|
||||||
query = buildStringCondition("job.project_id", filter.ProjectID, query)
|
query = buildStringCondition("job.project", filter.Project, query)
|
||||||
}
|
}
|
||||||
if filter.ClusterID != nil {
|
if filter.Cluster != nil {
|
||||||
query = buildStringCondition("job.cluster_id", filter.ClusterID, query)
|
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||||
}
|
}
|
||||||
if filter.StartTime != nil {
|
if filter.StartTime != nil {
|
||||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||||
@ -123,12 +118,13 @@ func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
if filter.Duration != nil {
|
if filter.Duration != nil {
|
||||||
query = buildIntCondition("job.duration", filter.Duration, query)
|
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||||
}
|
}
|
||||||
if filter.IsRunning != nil {
|
if filter.State != nil {
|
||||||
if *filter.IsRunning {
|
states := make([]string, len(filter.State))
|
||||||
query = query.Where("job.job_state = 'running'")
|
for i, val := range filter.State {
|
||||||
} else {
|
states[i] = string(val)
|
||||||
query = query.Where("job.job_state = 'completed'")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
query = query.Where(sq.Eq{"job.job_state": states})
|
||||||
}
|
}
|
||||||
if filter.NumNodes != nil {
|
if filter.NumNodes != nil {
|
||||||
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
|
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
|
||||||
@ -173,20 +169,23 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
|
|||||||
return query.Where(field+" = ?", *cond.Eq)
|
return query.Where(field+" = ?", *cond.Eq)
|
||||||
}
|
}
|
||||||
if cond.StartsWith != nil {
|
if cond.StartsWith != nil {
|
||||||
return query.Where(field+"LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
||||||
}
|
}
|
||||||
if cond.EndsWith != nil {
|
if cond.EndsWith != nil {
|
||||||
return query.Where(field+"LIKE ?", fmt.Sprint("%", *cond.StartsWith))
|
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
|
||||||
}
|
}
|
||||||
if cond.Contains != nil {
|
if cond.Contains != nil {
|
||||||
return query.Where(field+"LIKE ?", fmt.Sprint("%", *cond.StartsWith, "%"))
|
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
|
||||||
}
|
}
|
||||||
return query
|
return query
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||||
|
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||||
|
|
||||||
func toSnakeCase(str string) string {
|
func toSnakeCase(str string) string {
|
||||||
matchFirstCap := regexp.MustCompile("(.)([A-Z][a-z]+)")
|
str = strings.ReplaceAll(str, "'", "")
|
||||||
matchAllCap := regexp.MustCompile("([a-z0-9])([A-Z])")
|
str = strings.ReplaceAll(str, "\\", "")
|
||||||
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
|
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
|
||||||
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
|
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
|
||||||
return strings.ToLower(snake)
|
return strings.ToLower(snake)
|
||||||
|
@ -1,40 +1,38 @@
|
|||||||
|
scalar Time
|
||||||
|
scalar NullableFloat
|
||||||
|
scalar MetricScope
|
||||||
|
scalar JobState
|
||||||
|
|
||||||
type Job {
|
type Job {
|
||||||
id: ID! # Database ID, unique
|
id: ID!
|
||||||
jobId: String! # ID given to the job by the cluster scheduler
|
jobId: Int!
|
||||||
userId: String! # Username
|
user: String!
|
||||||
projectId: String! # Project
|
project: String!
|
||||||
clusterId: String! # Name of the cluster this job was running on
|
cluster: String!
|
||||||
startTime: Time! # RFC3339 formated string
|
startTime: Time!
|
||||||
duration: Int! # For running jobs, the time it has already run
|
duration: Int!
|
||||||
numNodes: Int! # Number of nodes this job was running on
|
numNodes: Int!
|
||||||
nodes: [String!]! # List of hostnames
|
numHWThreads: Int!
|
||||||
hasProfile: Boolean! # TODO: Could be removed?
|
numAcc: Int!
|
||||||
state: JobState! # State of the job
|
SMT: Int!
|
||||||
tags: [JobTag!]! # List of tags this job has
|
exclusive: Int!
|
||||||
|
partition: String!
|
||||||
# Will be null for running jobs.
|
arrayJobId: Int!
|
||||||
loadAvg: Float
|
monitoringStatus: Int!
|
||||||
memUsedMax: Float
|
state: JobState!
|
||||||
flopsAnyAvg: Float
|
tags: [Tag!]!
|
||||||
memBwAvg: Float
|
resources: [Resource!]!
|
||||||
netBwAvg: Float
|
|
||||||
fileBwAvg: Float
|
|
||||||
}
|
|
||||||
|
|
||||||
# TODO: Extend by more possible states?
|
|
||||||
enum JobState {
|
|
||||||
running
|
|
||||||
completed
|
|
||||||
}
|
|
||||||
|
|
||||||
type JobTag {
|
|
||||||
id: ID! # Database ID, unique
|
|
||||||
tagType: String! # Type
|
|
||||||
tagName: String! # Name
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cluster {
|
type Cluster {
|
||||||
clusterID: String!
|
name: String!
|
||||||
|
metricConfig: [MetricConfig!]!
|
||||||
|
filterRanges: FilterRanges!
|
||||||
|
partitions: [Partition!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Partition {
|
||||||
|
name: String!
|
||||||
processorType: String!
|
processorType: String!
|
||||||
socketsPerNode: Int!
|
socketsPerNode: Int!
|
||||||
coresPerSocket: Int!
|
coresPerSocket: Int!
|
||||||
@ -42,37 +40,46 @@ type Cluster {
|
|||||||
flopRateScalar: Int!
|
flopRateScalar: Int!
|
||||||
flopRateSimd: Int!
|
flopRateSimd: Int!
|
||||||
memoryBandwidth: Int!
|
memoryBandwidth: Int!
|
||||||
metricConfig: [MetricConfig!]!
|
topology: Topology!
|
||||||
filterRanges: FilterRanges!
|
}
|
||||||
|
|
||||||
|
type Topology {
|
||||||
|
node: [Int!]
|
||||||
|
socket: [[Int!]!]
|
||||||
|
memoryDomain: [[Int!]!]
|
||||||
|
die: [[Int!]!]
|
||||||
|
core: [[Int!]!]
|
||||||
|
accelerators: [Accelerator!]
|
||||||
|
}
|
||||||
|
|
||||||
|
type Accelerator {
|
||||||
|
id: String!
|
||||||
|
type: String!
|
||||||
|
model: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig {
|
type MetricConfig {
|
||||||
name: String!
|
name: String!
|
||||||
unit: String!
|
unit: String!
|
||||||
sampletime: Int!
|
scope: MetricScope!
|
||||||
peak: Int!
|
|
||||||
normal: Int!
|
|
||||||
caution: Int!
|
|
||||||
alert: Int!
|
|
||||||
}
|
|
||||||
|
|
||||||
type JobMetric {
|
|
||||||
unit: String!
|
|
||||||
scope: JobMetricScope!
|
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
series: [JobMetricSeries!]!
|
peak: Float!
|
||||||
|
normal: Float!
|
||||||
|
caution: Float!
|
||||||
|
alert: Float!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetricSeries {
|
type Tag {
|
||||||
node_id: String!
|
id: ID!
|
||||||
statistics: JobMetricStatistics
|
type: String!
|
||||||
data: [NullableFloat!]!
|
name: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetricStatistics {
|
type Resource {
|
||||||
avg: Float!
|
hostname: String!
|
||||||
min: Float!
|
hwthreads: [Int!]
|
||||||
max: Float!
|
accelerators: [Int!]
|
||||||
|
configuration: String
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetricWithName {
|
type JobMetricWithName {
|
||||||
@ -80,6 +87,33 @@ type JobMetricWithName {
|
|||||||
metric: JobMetric!
|
metric: JobMetric!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type JobMetric {
|
||||||
|
unit: String!
|
||||||
|
scope: MetricScope!
|
||||||
|
timestep: Int!
|
||||||
|
series: [Series!]
|
||||||
|
statisticsSeries: StatsSeries
|
||||||
|
}
|
||||||
|
|
||||||
|
type Series {
|
||||||
|
hostname: String!
|
||||||
|
id: Int
|
||||||
|
statistics: MetricStatistics
|
||||||
|
data: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricStatistics {
|
||||||
|
avg: NullableFloat!
|
||||||
|
min: NullableFloat!
|
||||||
|
max: NullableFloat!
|
||||||
|
}
|
||||||
|
|
||||||
|
type StatsSeries {
|
||||||
|
mean: [NullableFloat!]!
|
||||||
|
min: [NullableFloat!]!
|
||||||
|
max: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
type MetricFootprints {
|
type MetricFootprints {
|
||||||
name: String!
|
name: String!
|
||||||
footprints: [NullableFloat!]!
|
footprints: [NullableFloat!]!
|
||||||
@ -87,38 +121,43 @@ type MetricFootprints {
|
|||||||
|
|
||||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
enum Aggregate { USER, PROJECT, CLUSTER }
|
||||||
|
|
||||||
|
type NodeMetric {
|
||||||
|
name: String!
|
||||||
|
data: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeMetrics {
|
||||||
|
id: String!
|
||||||
|
metrics: [NodeMetric!]!
|
||||||
|
}
|
||||||
|
|
||||||
type Query {
|
type Query {
|
||||||
clusters: [Cluster!]! # List of all clusters
|
clusters: [Cluster!]! # List of all clusters
|
||||||
tags: [JobTag!]! # List of all tags
|
tags: [Tag!]! # List of all tags
|
||||||
|
|
||||||
job(id: ID!): Job
|
job(id: ID!): Job
|
||||||
jobMetrics(id: ID!, metrics: [String!]): [JobMetricWithName!]!
|
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): [MetricFootprints]!
|
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): [MetricFootprints]!
|
||||||
|
|
||||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||||
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
|
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
|
||||||
|
|
||||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||||
|
|
||||||
|
nodeMetrics(cluster: ID!, nodes: [String!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Mutation {
|
type Mutation {
|
||||||
createTag(type: String!, name: String!): JobTag!
|
createTag(type: String!, name: String!): Tag!
|
||||||
deleteTag(id: ID!): ID!
|
deleteTag(id: ID!): ID!
|
||||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [JobTag!]!
|
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [JobTag!]!
|
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
|
|
||||||
updateConfiguration(name: String!, value: String!): String
|
updateConfiguration(name: String!, value: String!): String
|
||||||
}
|
}
|
||||||
|
|
||||||
type IntRangeOutput {
|
type IntRangeOutput { from: Int!, to: Int! }
|
||||||
from: Int!
|
type TimeRangeOutput { from: Time!, to: Time! }
|
||||||
to: Int!
|
|
||||||
}
|
|
||||||
|
|
||||||
type TimeRangeOutput {
|
|
||||||
from: Time!
|
|
||||||
to: Time!
|
|
||||||
}
|
|
||||||
|
|
||||||
type FilterRanges {
|
type FilterRanges {
|
||||||
duration: IntRangeOutput!
|
duration: IntRangeOutput!
|
||||||
@ -129,13 +168,13 @@ type FilterRanges {
|
|||||||
input JobFilter {
|
input JobFilter {
|
||||||
tags: [ID!]
|
tags: [ID!]
|
||||||
jobId: StringInput
|
jobId: StringInput
|
||||||
userId: StringInput
|
user: StringInput
|
||||||
projectId: StringInput
|
project: StringInput
|
||||||
clusterId: StringInput
|
cluster: StringInput
|
||||||
duration: IntRange
|
duration: IntRange
|
||||||
numNodes: IntRange
|
numNodes: IntRange
|
||||||
startTime: TimeRange
|
startTime: TimeRange
|
||||||
isRunning: Boolean
|
state: [JobState!]
|
||||||
flopsAnyAvg: FloatRange
|
flopsAnyAvg: FloatRange
|
||||||
memBwAvg: FloatRange
|
memBwAvg: FloatRange
|
||||||
loadAvg: FloatRange
|
loadAvg: FloatRange
|
||||||
@ -159,20 +198,9 @@ input StringInput {
|
|||||||
endsWith: String
|
endsWith: String
|
||||||
}
|
}
|
||||||
|
|
||||||
input IntRange {
|
input IntRange { from: Int!, to: Int! }
|
||||||
from: Int!
|
input FloatRange { from: Float!, to: Float! }
|
||||||
to: Int!
|
input TimeRange { from: Time, to: Time }
|
||||||
}
|
|
||||||
|
|
||||||
input FloatRange {
|
|
||||||
from: Float!
|
|
||||||
to: Float!
|
|
||||||
}
|
|
||||||
|
|
||||||
input TimeRange {
|
|
||||||
from: Time
|
|
||||||
to: Time
|
|
||||||
}
|
|
||||||
|
|
||||||
type JobResultList {
|
type JobResultList {
|
||||||
items: [Job!]!
|
items: [Job!]!
|
||||||
@ -200,7 +228,3 @@ input PageRequest {
|
|||||||
itemsPerPage: Int!
|
itemsPerPage: Int!
|
||||||
page: Int!
|
page: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
scalar Time
|
|
||||||
scalar NullableFloat
|
|
||||||
scalar JobMetricScope
|
|
||||||
|
@ -5,42 +5,41 @@ package graph
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
|
"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
sq "github.com/Masterminds/squirrel"
|
sq "github.com/Masterminds/squirrel"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (r *jobResolver) Tags(ctx context.Context, obj *model.Job) ([]*model.JobTag, error) {
|
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||||
query := sq.
|
query := sq.
|
||||||
Select("tag.id", "tag.tag_type", "tag.tag_name").
|
Select("tag.id", "tag.tag_type", "tag.tag_name").
|
||||||
From("tag").
|
From("tag").
|
||||||
Join("jobtag ON jobtag.tag_id = tag.id").
|
Join("jobtag ON jobtag.tag_id = tag.id").
|
||||||
Where("jobtag.job_id = ?", obj.ID)
|
Where("jobtag.job_id = ?", obj.ID)
|
||||||
|
|
||||||
rows, err := query.RunWith(r.DB).Query()
|
sql, args, err := query.ToSql()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
|
||||||
|
|
||||||
tags := make([]*model.JobTag, 0)
|
tags := make([]*schema.Tag, 0)
|
||||||
for rows.Next() {
|
if err := r.DB.Select(&tags, sql, args...); err != nil {
|
||||||
var tag model.JobTag
|
|
||||||
if err := rows.Scan(&tag.ID, &tag.TagType, &tag.TagName); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
tags = append(tags, &tag)
|
|
||||||
}
|
|
||||||
|
|
||||||
return tags, nil
|
return tags, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*model.JobTag, error) {
|
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||||
res, err := r.DB.Exec("INSERT INTO tag (tag_type, tag_name) VALUES ($1, $2)", typeArg, name)
|
res, err := r.DB.Exec("INSERT INTO tag (tag_type, tag_name) VALUES ($1, $2)", typeArg, name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -51,7 +50,7 @@ func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name s
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return &model.JobTag{ID: strconv.FormatInt(id, 10), TagType: typeArg, TagName: name}, nil
|
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
||||||
@ -59,7 +58,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
|
|||||||
panic(fmt.Errorf("not implemented"))
|
panic(fmt.Errorf("not implemented"))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*model.JobTag, error) {
|
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||||
jid, err := strconv.Atoi(job)
|
jid, err := strconv.Atoi(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -76,7 +75,9 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tags, err := r.Job().Tags(ctx, &model.Job{ID: job})
|
dummyJob := schema.Job{}
|
||||||
|
dummyJob.ID = int64(jid)
|
||||||
|
tags, err := r.Job().Tags(ctx, &dummyJob)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -89,7 +90,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
return tags, metricdata.UpdateTags(jobObj, tags)
|
return tags, metricdata.UpdateTags(jobObj, tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*model.JobTag, error) {
|
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||||
jid, err := strconv.Atoi(job)
|
jid, err := strconv.Atoi(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -106,7 +107,9 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tags, err := r.Job().Tags(ctx, &model.Job{ID: job})
|
dummyJob := schema.Job{}
|
||||||
|
dummyJob.ID = int64(jid)
|
||||||
|
tags, err := r.Job().Tags(ctx, &dummyJob)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -131,47 +134,54 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*model.Cluster, error)
|
|||||||
return config.Clusters, nil
|
return config.Clusters, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) Tags(ctx context.Context) ([]*model.JobTag, error) {
|
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||||
rows, err := sq.Select("id", "tag_type", "tag_name").From("tag").RunWith(r.DB).Query()
|
sql, args, err := sq.Select("id", "tag_type", "tag_name").From("tag").ToSql()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
|
||||||
|
|
||||||
tags := make([]*model.JobTag, 0)
|
tags := make([]*schema.Tag, 0)
|
||||||
for rows.Next() {
|
if err := r.DB.Select(&tags, sql, args...); err != nil {
|
||||||
var tag model.JobTag
|
|
||||||
if err := rows.Scan(&tag.ID, &tag.TagType, &tag.TagName); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
tags = append(tags, &tag)
|
|
||||||
}
|
|
||||||
|
|
||||||
return tags, nil
|
return tags, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) Job(ctx context.Context, id string) (*model.Job, error) {
|
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||||
return ScanJob(sq.Select(JobTableCols...).From("job").Where("job.id = ?", id).RunWith(r.DB).QueryRow())
|
query := sq.Select(schema.JobColumns...).From("job").Where("job.id = ?", id)
|
||||||
|
query = securityCheck(ctx, query)
|
||||||
|
sql, args, err := query.ToSql()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return schema.ScanJob(r.DB.QueryRowx(sql, args...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string) ([]*model.JobMetricWithName, error) {
|
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
||||||
job, err := r.Query().Job(ctx, id)
|
job, err := r.Query().Job(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdata.LoadData(job, metrics, ctx)
|
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
res := []*model.JobMetricWithName{}
|
res := []*model.JobMetricWithName{}
|
||||||
for name, md := range data {
|
for name, md := range data {
|
||||||
|
for scope, metric := range md {
|
||||||
|
if metric.Scope != schema.MetricScope(scope) {
|
||||||
|
panic("WTF?")
|
||||||
|
}
|
||||||
|
|
||||||
res = append(res, &model.JobMetricWithName{
|
res = append(res, &model.JobMetricWithName{
|
||||||
Name: name,
|
Name: name,
|
||||||
Metric: md,
|
Metric: metric,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return res, err
|
return res, err
|
||||||
}
|
}
|
||||||
@ -181,7 +191,7 @@ func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobF
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error) {
|
func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error) {
|
||||||
jobs, count, err := r.queryJobs(filter, page, order)
|
jobs, count, err := r.queryJobs(ctx, filter, page, order)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -197,6 +207,36 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
|
|||||||
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
||||||
|
user := auth.GetUser(ctx)
|
||||||
|
if user != nil && !user.IsAdmin {
|
||||||
|
return nil, errors.New("you need to be an administrator for this query")
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, from.Unix(), to.Unix(), ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := make([]*model.NodeMetrics, 0, len(data))
|
||||||
|
for node, metrics := range data {
|
||||||
|
nodeMetrics := make([]*model.NodeMetric, 0, len(metrics))
|
||||||
|
for metric, data := range metrics {
|
||||||
|
nodeMetrics = append(nodeMetrics, &model.NodeMetric{
|
||||||
|
Name: metric,
|
||||||
|
Data: data,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
res = append(res, &model.NodeMetrics{
|
||||||
|
ID: node,
|
||||||
|
Metrics: nodeMetrics,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Job returns generated.JobResolver implementation.
|
// Job returns generated.JobResolver implementation.
|
||||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ package graph
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
@ -16,9 +17,9 @@ import (
|
|||||||
|
|
||||||
// GraphQL validation should make sure that no unkown values can be specified.
|
// GraphQL validation should make sure that no unkown values can be specified.
|
||||||
var groupBy2column = map[model.Aggregate]string{
|
var groupBy2column = map[model.Aggregate]string{
|
||||||
model.AggregateUser: "job.user_id",
|
model.AggregateUser: "job.user",
|
||||||
model.AggregateProject: "job.project_id",
|
model.AggregateProject: "job.project",
|
||||||
model.AggregateCluster: "job.cluster_id",
|
model.AggregateCluster: "job.cluster",
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
|
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||||
@ -28,7 +29,8 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
|
|
||||||
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
||||||
for _, cluster := range config.Clusters {
|
for _, cluster := range config.Clusters {
|
||||||
corehoursCol := fmt.Sprintf("SUM(job.duration * job.num_nodes * %d * %d) / 3600", cluster.SocketsPerNode, cluster.CoresPerSocket)
|
for _, partition := range cluster.Partitions {
|
||||||
|
corehoursCol := fmt.Sprintf("SUM(job.duration * job.num_nodes * %d * %d) / 3600", partition.SocketsPerNode, partition.CoresPerSocket)
|
||||||
var query sq.SelectBuilder
|
var query sq.SelectBuilder
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
query = sq.Select(
|
query = sq.Select(
|
||||||
@ -36,7 +38,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
"COUNT(job.id)",
|
"COUNT(job.id)",
|
||||||
"SUM(job.duration) / 3600",
|
"SUM(job.duration) / 3600",
|
||||||
corehoursCol,
|
corehoursCol,
|
||||||
).From("job").Where("job.cluster_id = ?", cluster.ClusterID)
|
).From("job")
|
||||||
} else {
|
} else {
|
||||||
col := groupBy2column[*groupBy]
|
col := groupBy2column[*groupBy]
|
||||||
query = sq.Select(
|
query = sq.Select(
|
||||||
@ -44,9 +46,14 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
"COUNT(job.id)",
|
"COUNT(job.id)",
|
||||||
"SUM(job.duration) / 3600",
|
"SUM(job.duration) / 3600",
|
||||||
corehoursCol,
|
corehoursCol,
|
||||||
).From("job").Where("job.cluster_id = ?", cluster.ClusterID).GroupBy(col)
|
).From("job").GroupBy(col)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
query = query.
|
||||||
|
Where("job.cluster = ?", cluster.Name).
|
||||||
|
Where("job.partition = ?", partition.Name)
|
||||||
|
|
||||||
|
query = securityCheck(ctx, query)
|
||||||
for _, f := range filter {
|
for _, f := range filter {
|
||||||
query = buildWhereClause(f, query)
|
query = buildWhereClause(f, query)
|
||||||
}
|
}
|
||||||
@ -79,9 +86,11 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < 120")
|
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < 120")
|
||||||
|
query = securityCheck(ctx, query)
|
||||||
for _, f := range filter {
|
for _, f := range filter {
|
||||||
query = buildWhereClause(f, query)
|
query = buildWhereClause(f, query)
|
||||||
}
|
}
|
||||||
@ -91,6 +100,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
} else {
|
} else {
|
||||||
col := groupBy2column[*groupBy]
|
col := groupBy2column[*groupBy]
|
||||||
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < 120")
|
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < 120")
|
||||||
|
query = securityCheck(ctx, query)
|
||||||
for _, f := range filter {
|
for _, f := range filter {
|
||||||
query = buildWhereClause(f, query)
|
query = buildWhereClause(f, query)
|
||||||
}
|
}
|
||||||
@ -133,12 +143,12 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
|
|
||||||
if histogramsNeeded {
|
if histogramsNeeded {
|
||||||
var err error
|
var err error
|
||||||
stat.HistWalltime, err = r.jobsStatisticsHistogram("ROUND(job.duration / 3600) as value", filter, id, col)
|
stat.HistWalltime, err = r.jobsStatisticsHistogram(ctx, "ROUND(job.duration / 3600) as value", filter, id, col)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
stat.HistNumNodes, err = r.jobsStatisticsHistogram("job.num_nodes as value", filter, id, col)
|
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -150,8 +160,9 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
|
|
||||||
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
|
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
|
||||||
// to add a condition to the query of the kind "<col> = <id>".
|
// to add a condition to the query of the kind "<col> = <id>".
|
||||||
func (r *queryResolver) jobsStatisticsHistogram(value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
|
func (r *queryResolver) jobsStatisticsHistogram(ctx context.Context, value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
|
||||||
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
|
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
|
||||||
|
query = securityCheck(ctx, query)
|
||||||
for _, f := range filters {
|
for _, f := range filters {
|
||||||
query = buildWhereClause(f, query)
|
query = buildWhereClause(f, query)
|
||||||
}
|
}
|
||||||
@ -179,7 +190,7 @@ func (r *queryResolver) jobsStatisticsHistogram(value string, filters []*model.J
|
|||||||
|
|
||||||
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
|
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||||
func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||||
jobs, count, err := r.queryJobs(filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
jobs, count, err := r.queryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -195,14 +206,21 @@ func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilte
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, job := range jobs {
|
for _, job := range jobs {
|
||||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, ctx)
|
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
flops, membw := jobdata["flops_any"], jobdata["mem_bw"]
|
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
||||||
if flops == nil && membw == nil {
|
if flops_ == nil && membw_ == nil {
|
||||||
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %s", job.ID)
|
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
flops, ok1 := flops_["node"]
|
||||||
|
membw, ok2 := membw_["node"]
|
||||||
|
if !ok1 || !ok2 {
|
||||||
|
// TODO/FIXME:
|
||||||
|
return nil, errors.New("todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||||
}
|
}
|
||||||
|
|
||||||
for n := 0; n < len(flops.Series); n++ {
|
for n := 0; n < len(flops.Series); n++ {
|
||||||
@ -232,7 +250,7 @@ func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilte
|
|||||||
|
|
||||||
// Helper function for the jobsFootprints GraphQL query placed here so that schema.resolvers.go is not too full.
|
// Helper function for the jobsFootprints GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||||
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.MetricFootprints, error) {
|
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.MetricFootprints, error) {
|
||||||
jobs, count, err := r.queryJobs(filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
jobs, count, err := r.queryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
221
init-db.go
221
init-db.go
@ -2,18 +2,66 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"database/sql"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const JOBS_DB_SCHEMA string = `
|
||||||
|
DROP TABLE IF EXISTS job;
|
||||||
|
DROP TABLE IF EXISTS tag;
|
||||||
|
DROP TABLE IF EXISTS jobtag;
|
||||||
|
|
||||||
|
CREATE TABLE job (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT, -- Not needed in sqlite
|
||||||
|
job_id BIGINT NOT NULL,
|
||||||
|
cluster VARCHAR(255) NOT NULL,
|
||||||
|
start_time TIMESTAMP NOT NULL,
|
||||||
|
|
||||||
|
user VARCHAR(255) NOT NULL,
|
||||||
|
project VARCHAR(255) NOT NULL,
|
||||||
|
partition VARCHAR(255) NOT NULL,
|
||||||
|
array_job_id BIGINT NOT NULL,
|
||||||
|
duration INT,
|
||||||
|
job_state VARCHAR(255) CHECK(job_state IN ('running', 'completed', 'failed', 'canceled', 'stopped', 'timeout')) NOT NULL,
|
||||||
|
meta_data TEXT, -- json, but sqlite has no json type
|
||||||
|
resources TEXT NOT NULL, -- json, but sqlite has no json type
|
||||||
|
|
||||||
|
num_nodes INT NOT NULL,
|
||||||
|
num_hwthreads INT NOT NULL,
|
||||||
|
num_acc INT NOT NULL,
|
||||||
|
smt TINYINT CHECK(smt IN (0, 1 )) NOT NULL DEFAULT 1,
|
||||||
|
exclusive TINYINT CHECK(exclusive IN (0, 1, 2)) NOT NULL DEFAULT 1,
|
||||||
|
monitoring_status TINYINT CHECK(monitoring_status IN (0, 1 )) NOT NULL DEFAULT 1,
|
||||||
|
|
||||||
|
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||||
|
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||||
|
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||||
|
load_avg REAL NOT NULL DEFAULT 0.0,
|
||||||
|
net_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||||
|
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||||
|
file_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||||
|
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
|
||||||
|
|
||||||
|
CREATE TABLE tag (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
tag_type VARCHAR(255) NOT NULL,
|
||||||
|
tag_name VARCHAR(255) NOT NULL);
|
||||||
|
|
||||||
|
CREATE TABLE jobtag (
|
||||||
|
job_id INTEGER,
|
||||||
|
tag_id INTEGER,
|
||||||
|
PRIMARY KEY (job_id, tag_id),
|
||||||
|
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
|
||||||
|
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
|
||||||
|
`
|
||||||
|
|
||||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||||
// repopulate them using the jobs found in `archive`.
|
// repopulate them using the jobs found in `archive`.
|
||||||
func initDB(db *sqlx.DB, archive string) error {
|
func initDB(db *sqlx.DB, archive string) error {
|
||||||
@ -21,99 +69,101 @@ func initDB(db *sqlx.DB, archive string) error {
|
|||||||
fmt.Println("Building database...")
|
fmt.Println("Building database...")
|
||||||
|
|
||||||
// Basic database structure:
|
// Basic database structure:
|
||||||
_, err := db.Exec(`
|
_, err := db.Exec(JOBS_DB_SCHEMA)
|
||||||
DROP TABLE IF EXISTS job;
|
|
||||||
DROP TABLE IF EXISTS tag;
|
|
||||||
DROP TABLE IF EXISTS jobtag;
|
|
||||||
|
|
||||||
CREATE TABLE job (
|
|
||||||
id INTEGER PRIMARY KEY,
|
|
||||||
job_id TEXT,
|
|
||||||
user_id TEXT,
|
|
||||||
project_id TEXT,
|
|
||||||
cluster_id TEXT,
|
|
||||||
start_time TIMESTAMP,
|
|
||||||
duration INTEGER,
|
|
||||||
job_state TEXT,
|
|
||||||
num_nodes INTEGER,
|
|
||||||
node_list TEXT,
|
|
||||||
metadata TEXT,
|
|
||||||
|
|
||||||
flops_any_avg REAL,
|
|
||||||
mem_bw_avg REAL,
|
|
||||||
net_bw_avg REAL,
|
|
||||||
file_bw_avg REAL,
|
|
||||||
load_avg REAL);
|
|
||||||
CREATE TABLE tag (
|
|
||||||
id INTEGER PRIMARY KEY,
|
|
||||||
tag_type TEXT,
|
|
||||||
tag_name TEXT);
|
|
||||||
CREATE TABLE jobtag (
|
|
||||||
job_id INTEGER,
|
|
||||||
tag_id INTEGER,
|
|
||||||
PRIMARY KEY (job_id, tag_id),
|
|
||||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE ON UPDATE NO ACTION,
|
|
||||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE ON UPDATE NO ACTION);`)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
entries0, err := os.ReadDir(archive)
|
clustersDir, err := os.ReadDir(archive)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
insertstmt, err := db.Prepare(`INSERT INTO job
|
|
||||||
(job_id, user_id, project_id, cluster_id, start_time, duration, job_state, num_nodes, node_list, metadata, flops_any_avg, mem_bw_avg, net_bw_avg, file_bw_avg, load_avg)
|
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);`)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var tx *sql.Tx = nil
|
tx, err := db.Beginx()
|
||||||
var i int = 0
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt, err := tx.PrepareNamed(`INSERT INTO job (
|
||||||
|
job_id, user, project, cluster, partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
|
exclusive, monitoring_status, smt, job_state, start_time, duration, resources, meta_data,
|
||||||
|
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
||||||
|
) VALUES (
|
||||||
|
:job_id, :user, :project, :cluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||||
|
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :resources, :meta_data,
|
||||||
|
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
||||||
|
);`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
i := 0
|
||||||
tags := make(map[string]int64)
|
tags := make(map[string]int64)
|
||||||
for _, entry0 := range entries0 {
|
handleDirectory := func(filename string) error {
|
||||||
entries1, err := os.ReadDir(filepath.Join(archive, entry0.Name()))
|
// Bundle 100 inserts into one transaction for better performance:
|
||||||
if err != nil {
|
if i%100 == 0 {
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, entry1 := range entries1 {
|
|
||||||
if !entry1.IsDir() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
entries2, err := os.ReadDir(filepath.Join(archive, entry0.Name(), entry1.Name()))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, entry2 := range entries2 {
|
|
||||||
// Bundle 200 inserts into one transaction for better performance:
|
|
||||||
if i%200 == 0 {
|
|
||||||
if tx != nil {
|
if tx != nil {
|
||||||
if err := tx.Commit(); err != nil {
|
if err := tx.Commit(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tx, err = db.Begin()
|
tx, err = db.Beginx()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
insertstmt = tx.Stmt(insertstmt)
|
stmt = tx.NamedStmt(stmt)
|
||||||
fmt.Printf("%d jobs inserted...\r", i)
|
fmt.Printf("%d jobs inserted...\r", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
filename := filepath.Join(archive, entry0.Name(), entry1.Name(), entry2.Name())
|
err := loadJob(tx, stmt, tags, filename)
|
||||||
if err = loadJob(tx, insertstmt, tags, filename); err != nil {
|
if err == nil {
|
||||||
fmt.Printf("failed to load '%s': %s", filename, err.Error())
|
i += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, clusterDir := range clustersDir {
|
||||||
|
lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name()))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, lvl1Dir := range lvl1Dirs {
|
||||||
|
if !lvl1Dir.IsDir() {
|
||||||
|
// Could be the cluster.json file
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 1
|
lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name()))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, lvl2Dir := range lvl2Dirs {
|
||||||
|
dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
|
||||||
|
startTimeDirs, err := os.ReadDir(dirpath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, startTimeDir := range startTimeDirs {
|
||||||
|
if startTimeDir.Type().IsRegular() && startTimeDir.Name() == "meta.json" {
|
||||||
|
if err := handleDirectory(dirpath); err != nil {
|
||||||
|
log.Printf("in %s: %s\n", dirpath, err.Error())
|
||||||
|
}
|
||||||
|
} else if startTimeDir.IsDir() {
|
||||||
|
if err := handleDirectory(filepath.Join(dirpath, startTimeDir.Name())); err != nil {
|
||||||
|
log.Printf("in %s: %s\n", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -125,37 +175,44 @@ func initDB(db *sqlx.DB, archive string) error {
|
|||||||
// Create indexes after inserts so that they do not
|
// Create indexes after inserts so that they do not
|
||||||
// need to be continually updated.
|
// need to be continually updated.
|
||||||
if _, err := db.Exec(`
|
if _, err := db.Exec(`
|
||||||
CREATE INDEX job_by_user ON job (user_id);
|
CREATE INDEX job_by_user ON job (user);
|
||||||
CREATE INDEX job_by_starttime ON job (start_time);`); err != nil {
|
CREATE INDEX job_by_starttime ON job (start_time);`); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
|
log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the `meta.json` file at `path` and insert it to the database using the prepared
|
// Read the `meta.json` file at `path` and insert it to the database using the prepared
|
||||||
// insert statement `stmt`. `tags` maps all existing tags to their database ID.
|
// insert statement `stmt`. `tags` maps all existing tags to their database ID.
|
||||||
func loadJob(tx *sql.Tx, stmt *sql.Stmt, tags map[string]int64, path string) error {
|
func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path string) error {
|
||||||
f, err := os.Open(filepath.Join(path, "meta.json"))
|
f, err := os.Open(filepath.Join(path, "meta.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
var job schema.JobMeta
|
var jobMeta schema.JobMeta = schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||||
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&job); err != nil {
|
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&jobMeta); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
flopsAnyAvg := loadJobStat(&job, "flops_any")
|
job := schema.Job{
|
||||||
memBwAvg := loadJobStat(&job, "mem_bw")
|
BaseJob: jobMeta.BaseJob,
|
||||||
netBwAvg := loadJobStat(&job, "net_bw")
|
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||||
fileBwAvg := loadJobStat(&job, "file_bw")
|
}
|
||||||
loadAvg := loadJobStat(&job, "load_one")
|
|
||||||
|
|
||||||
res, err := stmt.Exec(job.JobId, job.UserId, job.ProjectId, job.ClusterId, job.StartTime, job.Duration, job.JobState,
|
// TODO: Other metrics...
|
||||||
job.NumNodes, strings.Join(job.Nodes, ","), nil, flopsAnyAvg, memBwAvg, netBwAvg, fileBwAvg, loadAvg)
|
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
|
||||||
|
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
|
||||||
|
|
||||||
|
job.RawResources, err = json.Marshal(job.Resources)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := stmt.Exec(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -188,12 +245,10 @@ func loadJob(tx *sql.Tx, stmt *sql.Stmt, tags map[string]int64, path string) err
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadJobStat(job *schema.JobMeta, metric string) sql.NullFloat64 {
|
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
||||||
val := sql.NullFloat64{Valid: false}
|
|
||||||
if stats, ok := job.Statistics[metric]; ok {
|
if stats, ok := job.Statistics[metric]; ok {
|
||||||
val.Valid = true
|
return stats.Avg
|
||||||
val.Float64 = stats.Avg
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return val
|
return 0.0
|
||||||
}
|
}
|
||||||
|
@ -11,35 +11,30 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
var JobArchivePath string = "./var/job-archive"
|
|
||||||
|
|
||||||
// For a given job, return the path of the `data.json`/`meta.json` file.
|
// For a given job, return the path of the `data.json`/`meta.json` file.
|
||||||
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
|
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
|
||||||
func getPath(job *model.Job, file string) (string, error) {
|
func getPath(job *schema.Job, file string, checkLegacy bool) (string, error) {
|
||||||
id, err := strconv.Atoi(strings.Split(job.JobID, ".")[0])
|
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
|
||||||
if err != nil {
|
if !checkLegacy {
|
||||||
return "", err
|
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
lvl1, lvl2 := fmt.Sprintf("%d", id/1000), fmt.Sprintf("%03d", id%1000)
|
legacyPath := filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, file)
|
||||||
legacyPath := filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, file)
|
|
||||||
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
|
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
|
||||||
return filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return legacyPath, nil
|
return legacyPath, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assuming job is completed/archived, return the jobs metric data.
|
// Assuming job is completed/archived, return the jobs metric data.
|
||||||
func loadFromArchive(job *model.Job) (schema.JobData, error) {
|
func loadFromArchive(job *schema.Job) (schema.JobData, error) {
|
||||||
filename, err := getPath(job, "data.json")
|
filename, err := getPath(job, "data.json", true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -60,12 +55,12 @@ func loadFromArchive(job *model.Job) (schema.JobData, error) {
|
|||||||
|
|
||||||
// If the job is archived, find its `meta.json` file and override the tags list
|
// If the job is archived, find its `meta.json` file and override the tags list
|
||||||
// in that JSON file. If the job is not archived, nothing is done.
|
// in that JSON file. If the job is not archived, nothing is done.
|
||||||
func UpdateTags(job *model.Job, tags []*model.JobTag) error {
|
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||||
if job.State == model.JobStateRunning {
|
if job.State == schema.JobStateRunning {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
filename, err := getPath(job, "meta.json")
|
filename, err := getPath(job, "meta.json", true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -78,23 +73,19 @@ func UpdateTags(job *model.Job, tags []*model.JobTag) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var metaFile schema.JobMeta
|
var metaFile schema.JobMeta = schema.JobMeta{
|
||||||
|
BaseJob: schema.JobDefaults,
|
||||||
|
}
|
||||||
if err := json.NewDecoder(f).Decode(&metaFile); err != nil {
|
if err := json.NewDecoder(f).Decode(&metaFile); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
f.Close()
|
f.Close()
|
||||||
|
|
||||||
metaFile.Tags = make([]struct {
|
metaFile.Tags = make([]*schema.Tag, 0)
|
||||||
Name string "json:\"name\""
|
|
||||||
Type string "json:\"type\""
|
|
||||||
}, 0)
|
|
||||||
for _, tag := range tags {
|
for _, tag := range tags {
|
||||||
metaFile.Tags = append(metaFile.Tags, struct {
|
metaFile.Tags = append(metaFile.Tags, &schema.Tag{
|
||||||
Name string "json:\"name\""
|
Name: tag.Name,
|
||||||
Type string "json:\"type\""
|
Type: tag.Type,
|
||||||
}{
|
|
||||||
Name: tag.TagName,
|
|
||||||
Type: tag.TagType,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,8 +98,8 @@ func UpdateTags(job *model.Job, tags []*model.JobTag) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Helper to metricdata.LoadAverages().
|
// Helper to metricdata.LoadAverages().
|
||||||
func loadAveragesFromArchive(job *model.Job, metrics []string, data [][]schema.Float) error {
|
func loadAveragesFromArchive(job *schema.Job, metrics []string, data [][]schema.Float) error {
|
||||||
filename, err := getPath(job, "meta.json")
|
filename, err := getPath(job, "meta.json", true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -135,97 +126,144 @@ func loadAveragesFromArchive(job *model.Job, metrics []string, data [][]schema.F
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Writes a running job to the job-archive
|
// Writes a running job to the job-archive
|
||||||
func ArchiveJob(job *model.Job, ctx context.Context) error {
|
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||||
if job.State != model.JobStateRunning {
|
if job.State != schema.JobStateRunning {
|
||||||
return errors.New("cannot archive job that is not running")
|
return nil, errors.New("cannot archive job that is not running")
|
||||||
}
|
}
|
||||||
|
|
||||||
allMetrics := make([]string, 0)
|
allMetrics := make([]string, 0)
|
||||||
metricConfigs := config.GetClusterConfig(job.ClusterID).MetricConfig
|
metricConfigs := config.GetClusterConfig(job.Cluster).MetricConfig
|
||||||
for _, mc := range metricConfigs {
|
for _, mc := range metricConfigs {
|
||||||
allMetrics = append(allMetrics, mc.Name)
|
allMetrics = append(allMetrics, mc.Name)
|
||||||
}
|
}
|
||||||
jobData, err := LoadData(job, allMetrics, ctx)
|
|
||||||
|
// TODO: Use more granular resolution on non-exclusive jobs?
|
||||||
|
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||||
|
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tags := []struct {
|
if err := calcStatisticsSeries(job, jobData); err != nil {
|
||||||
Name string `json:"name"`
|
return nil, err
|
||||||
Type string `json:"type"`
|
|
||||||
}{}
|
|
||||||
for _, tag := range job.Tags {
|
|
||||||
tags = append(tags, struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Type string `json:"type"`
|
|
||||||
}{
|
|
||||||
Name: tag.TagName,
|
|
||||||
Type: tag.TagType,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
metaData := &schema.JobMeta{
|
jobMeta := &schema.JobMeta{
|
||||||
JobId: job.JobID,
|
BaseJob: job.BaseJob,
|
||||||
UserId: job.UserID,
|
|
||||||
ClusterId: job.ClusterID,
|
|
||||||
NumNodes: job.NumNodes,
|
|
||||||
JobState: job.State.String(),
|
|
||||||
StartTime: job.StartTime.Unix(),
|
StartTime: job.StartTime.Unix(),
|
||||||
Duration: int64(job.Duration),
|
Statistics: make(map[string]schema.JobStatistics),
|
||||||
Nodes: job.Nodes,
|
|
||||||
Tags: tags,
|
|
||||||
Statistics: make(map[string]*schema.JobMetaStatistics),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for metric, data := range jobData {
|
for metric, data := range jobData {
|
||||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||||
for _, nodedata := range data.Series {
|
nodeData, ok := data["node"]
|
||||||
avg += nodedata.Statistics.Avg
|
if !ok {
|
||||||
min = math.Min(min, nodedata.Statistics.Min)
|
// TODO/FIXME: Calc average for non-node metrics as well!
|
||||||
max = math.Max(max, nodedata.Statistics.Max)
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
metaData.Statistics[metric] = &schema.JobMetaStatistics{
|
for _, series := range nodeData.Series {
|
||||||
Unit: config.GetMetricConfig(job.ClusterID, metric).Unit,
|
avg += series.Statistics.Avg
|
||||||
|
min = math.Min(min, series.Statistics.Min)
|
||||||
|
max = math.Max(max, series.Statistics.Max)
|
||||||
|
}
|
||||||
|
|
||||||
|
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||||
|
Unit: config.GetMetricConfig(job.Cluster, metric).Unit,
|
||||||
Avg: avg / float64(job.NumNodes),
|
Avg: avg / float64(job.NumNodes),
|
||||||
Min: min,
|
Min: min,
|
||||||
Max: max,
|
Max: max,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dirPath, err := getPath(job, "")
|
// If the file based archive is disabled,
|
||||||
|
// only return the JobMeta structure as the
|
||||||
|
// statistics in there are needed.
|
||||||
|
if !useArchive {
|
||||||
|
return jobMeta, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
dirPath, err := getPath(job, "", false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.MkdirAll(dirPath, 0777); err != nil {
|
if err := os.MkdirAll(dirPath, 0777); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := os.Create(path.Join(dirPath, "meta.json"))
|
f, err := os.Create(path.Join(dirPath, "meta.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
writer := bufio.NewWriter(f)
|
writer := bufio.NewWriter(f)
|
||||||
if err := json.NewEncoder(writer).Encode(metaData); err != nil {
|
if err := json.NewEncoder(writer).Encode(jobMeta); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
if err := writer.Flush(); err != nil {
|
if err := writer.Flush(); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err = os.Create(path.Join(dirPath, "data.json"))
|
f, err = os.Create(path.Join(dirPath, "data.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
writer = bufio.NewWriter(f)
|
writer = bufio.NewWriter(f)
|
||||||
if err := json.NewEncoder(writer).Encode(metaData); err != nil {
|
if err := json.NewEncoder(writer).Encode(jobData); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
if err := writer.Flush(); err != nil {
|
if err := writer.Flush(); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return f.Close()
|
return jobMeta, f.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add statisticsSeries fields
|
||||||
|
func calcStatisticsSeries(job *schema.Job, jobData schema.JobData) error {
|
||||||
|
for _, scopes := range jobData {
|
||||||
|
for _, jobMetric := range scopes {
|
||||||
|
if jobMetric.StatisticsSeries != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(jobMetric.Series) < 5 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
for _, series := range jobMetric.Series {
|
||||||
|
if len(series.Data) > n {
|
||||||
|
n = len(series.Data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mean, min, max := make([]schema.Float, n), make([]schema.Float, n), make([]schema.Float, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
sum, smin, smax := schema.Float(0.), math.MaxFloat32, -math.MaxFloat32
|
||||||
|
for _, series := range jobMetric.Series {
|
||||||
|
if i >= len(series.Data) {
|
||||||
|
sum, smin, smax = schema.NaN, math.NaN(), math.NaN()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
x := series.Data[i]
|
||||||
|
sum += x
|
||||||
|
smin = math.Min(smin, float64(x))
|
||||||
|
smax = math.Max(smax, float64(x))
|
||||||
|
}
|
||||||
|
sum /= schema.Float(len(jobMetric.Series))
|
||||||
|
mean[i] = sum
|
||||||
|
min[i] = schema.Float(smin)
|
||||||
|
max[i] = schema.Float(smax)
|
||||||
|
}
|
||||||
|
|
||||||
|
jobMetric.StatisticsSeries = &schema.StatsSeries{
|
||||||
|
Min: min, Mean: mean, Max: max,
|
||||||
|
}
|
||||||
|
jobMetric.Series = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1,17 +1,18 @@
|
|||||||
package metricdata
|
package metricdata
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -31,9 +32,9 @@ type ApiMetricData struct {
|
|||||||
From int64 `json:"from"`
|
From int64 `json:"from"`
|
||||||
To int64 `json:"to"`
|
To int64 `json:"to"`
|
||||||
Data []schema.Float `json:"data"`
|
Data []schema.Float `json:"data"`
|
||||||
Avg *float64 `json:"avg"`
|
Avg schema.Float `json:"avg"`
|
||||||
Min *float64 `json:"min"`
|
Min schema.Float `json:"min"`
|
||||||
Max *float64 `json:"max"`
|
Max schema.Float `json:"max"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ApiStatsData struct {
|
type ApiStatsData struct {
|
||||||
@ -46,22 +47,23 @@ type ApiStatsData struct {
|
|||||||
Max schema.Float `json:"max"`
|
Max schema.Float `json:"max"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ccms *CCMetricStore) Init() error {
|
func (ccms *CCMetricStore) Init(url, token string) error {
|
||||||
ccms.url = os.Getenv("CCMETRICSTORE_URL")
|
ccms.url = url
|
||||||
ccms.jwt = os.Getenv("CCMETRICSTORE_JWT")
|
ccms.jwt = token
|
||||||
if ccms.url == "" || ccms.jwt == "" {
|
|
||||||
return errors.New("environment variables 'CCMETRICSTORE_URL' or 'CCMETRICSTORE_JWT' not set")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ccms *CCMetricStore) LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
func (ccms *CCMetricStore) doRequest(job *schema.Job, suffix string, metrics []string, ctx context.Context) (*http.Response, error) {
|
||||||
from, to := job.StartTime.Unix(), job.StartTime.Add(time.Duration(job.Duration)*time.Second).Unix()
|
from, to := job.StartTime.Unix(), job.StartTime.Add(time.Duration(job.Duration)*time.Second).Unix()
|
||||||
reqBody := ApiRequestBody{}
|
reqBody := ApiRequestBody{}
|
||||||
reqBody.Metrics = metrics
|
reqBody.Metrics = metrics
|
||||||
for _, node := range job.Nodes {
|
for _, node := range job.Resources {
|
||||||
reqBody.Selectors = append(reqBody.Selectors, []string{job.ClusterID, node})
|
if node.Accelerators != nil || node.HWThreads != nil {
|
||||||
|
// TODO/FIXME:
|
||||||
|
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
|
||||||
|
}
|
||||||
|
|
||||||
|
reqBody.Selectors = append(reqBody.Selectors, []string{job.Cluster, node.Hostname})
|
||||||
}
|
}
|
||||||
|
|
||||||
reqBodyBytes, err := json.Marshal(reqBody)
|
reqBodyBytes, err := json.Marshal(reqBody)
|
||||||
@ -69,53 +71,324 @@ func (ccms *CCMetricStore) LoadData(job *model.Job, metrics []string, ctx contex
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
authHeader := fmt.Sprintf("Bearer %s", ccms.jwt)
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%d/%d/%s", ccms.url, from, to, suffix), bytes.NewReader(reqBodyBytes))
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%d/%d/timeseries?with-stats=true", ccms.url, from, to), bytes.NewReader(reqBodyBytes))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
req.Header.Add("Authorization", authHeader)
|
if ccms.jwt != "" {
|
||||||
|
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||||
|
}
|
||||||
|
return ccms.client.Do(req)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||||
|
// log.Printf("job: %#v", job)
|
||||||
|
|
||||||
|
type ApiQuery struct {
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
|
Type *string `json:"type,omitempty"`
|
||||||
|
TypeIds []string `json:"type-ids,omitempty"`
|
||||||
|
SubType *string `json:"subtype,omitempty"`
|
||||||
|
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiQueryRequest struct {
|
||||||
|
Cluster string `json:"cluster"`
|
||||||
|
From int64 `json:"from"`
|
||||||
|
To int64 `json:"to"`
|
||||||
|
Queries []ApiQuery `json:"queries"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiQueryResponse struct {
|
||||||
|
ApiMetricData
|
||||||
|
Query *ApiQuery `json:"query"`
|
||||||
|
}
|
||||||
|
|
||||||
|
reqBody := ApiQueryRequest{
|
||||||
|
Cluster: job.Cluster,
|
||||||
|
From: job.StartTime.Unix(),
|
||||||
|
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
||||||
|
Queries: make([]ApiQuery, 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(scopes) != 1 {
|
||||||
|
return nil, errors.New("todo: support more than one scope in a query")
|
||||||
|
}
|
||||||
|
|
||||||
|
topology := config.GetPartition(job.Cluster, job.Partition).Topology
|
||||||
|
scopeForMetric := map[string]schema.MetricScope{}
|
||||||
|
for _, metric := range metrics {
|
||||||
|
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||||
|
if mc == nil {
|
||||||
|
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||||
|
log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
nativeScope, requestedScope := mc.Scope, scopes[0]
|
||||||
|
|
||||||
|
// case 1: A metric is requested at node scope with a native scope of node as well
|
||||||
|
// case 2: A metric is requested at node scope and node is exclusive
|
||||||
|
// case 3: A metric has native scope node
|
||||||
|
if (nativeScope == requestedScope && nativeScope == schema.MetricScopeNode) ||
|
||||||
|
(job.Exclusive == 1 && requestedScope == schema.MetricScopeNode) ||
|
||||||
|
(nativeScope == schema.MetricScopeNode) {
|
||||||
|
nodes := map[string]bool{}
|
||||||
|
for _, resource := range job.Resources {
|
||||||
|
nodes[resource.Hostname] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
for node := range nodes {
|
||||||
|
reqBody.Queries = append(reqBody.Queries, ApiQuery{
|
||||||
|
Metric: metric,
|
||||||
|
Hostname: node,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
scopeForMetric[metric] = schema.MetricScopeNode
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// case: Read a metric at hwthread scope with native scope hwthread
|
||||||
|
if nativeScope == requestedScope && nativeScope == schema.MetricScopeHWThread && job.NumNodes == 1 {
|
||||||
|
hwthreads := job.Resources[0].HWThreads
|
||||||
|
if hwthreads == nil {
|
||||||
|
hwthreads = topology.Node
|
||||||
|
}
|
||||||
|
|
||||||
|
t := "cpu" // TODO/FIXME: inconsistency between cc-metric-collector and ClusterCockpit
|
||||||
|
for _, hwthread := range hwthreads {
|
||||||
|
reqBody.Queries = append(reqBody.Queries, ApiQuery{
|
||||||
|
Metric: metric,
|
||||||
|
Hostname: job.Resources[0].Hostname,
|
||||||
|
Type: &t,
|
||||||
|
TypeIds: []string{strconv.Itoa(hwthread)},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
scopeForMetric[metric] = schema.MetricScopeHWThread
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// case: A metric is requested at node scope, has a hwthread scope and node is not exclusive and runs on a single node
|
||||||
|
if requestedScope == schema.MetricScopeNode && nativeScope == schema.MetricScopeHWThread && job.Exclusive != 1 && job.NumNodes == 1 {
|
||||||
|
hwthreads := job.Resources[0].HWThreads
|
||||||
|
if hwthreads == nil {
|
||||||
|
hwthreads = topology.Node
|
||||||
|
}
|
||||||
|
|
||||||
|
t := "cpu" // TODO/FIXME: inconsistency between cc-metric-collector and ClusterCockpit
|
||||||
|
ids := make([]string, 0, len(hwthreads))
|
||||||
|
for _, hwthread := range hwthreads {
|
||||||
|
ids = append(ids, strconv.Itoa(hwthread))
|
||||||
|
}
|
||||||
|
|
||||||
|
reqBody.Queries = append(reqBody.Queries, ApiQuery{
|
||||||
|
Metric: metric,
|
||||||
|
Hostname: job.Resources[0].Hostname,
|
||||||
|
Type: &t,
|
||||||
|
TypeIds: ids,
|
||||||
|
})
|
||||||
|
scopeForMetric[metric] = schema.MetricScopeNode
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Job teilt sich knoten und metric native scope ist kleiner als node
|
||||||
|
panic("todo")
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Printf("query: %#v", reqBody)
|
||||||
|
|
||||||
|
buf := &bytes.Buffer{}
|
||||||
|
if err := json.NewEncoder(buf).Encode(reqBody); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.url+"/api/query", buf)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if ccms.jwt != "" {
|
||||||
|
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||||
|
}
|
||||||
res, err := ccms.client.Do(req)
|
res, err := ccms.client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if res.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("cc-metric-store replied with: %s", res.Status)
|
||||||
|
}
|
||||||
|
|
||||||
resdata := make([]map[string]ApiMetricData, 0, len(reqBody.Selectors))
|
var resBody []ApiQueryResponse
|
||||||
|
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Printf("response: %#v", resBody)
|
||||||
|
|
||||||
|
var jobData schema.JobData = make(schema.JobData)
|
||||||
|
for _, res := range resBody {
|
||||||
|
|
||||||
|
metric := res.Query.Metric
|
||||||
|
if _, ok := jobData[metric]; !ok {
|
||||||
|
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.Error != nil {
|
||||||
|
return nil, fmt.Errorf("cc-metric-store error while fetching %s: %s", metric, *res.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||||
|
scope := scopeForMetric[metric]
|
||||||
|
jobMetric, ok := jobData[metric][scope]
|
||||||
|
if !ok {
|
||||||
|
jobMetric = &schema.JobMetric{
|
||||||
|
Unit: mc.Unit,
|
||||||
|
Scope: scope,
|
||||||
|
Timestep: mc.Timestep,
|
||||||
|
Series: make([]schema.Series, 0),
|
||||||
|
}
|
||||||
|
jobData[metric][scope] = jobMetric
|
||||||
|
}
|
||||||
|
|
||||||
|
id := (*int)(nil)
|
||||||
|
if res.Query.Type != nil {
|
||||||
|
id = new(int)
|
||||||
|
*id, _ = strconv.Atoi(res.Query.TypeIds[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||||
|
// TODO: use schema.Float instead of float64?
|
||||||
|
// This is done because regular float64 can not be JSONed when NaN.
|
||||||
|
res.Avg = schema.Float(0)
|
||||||
|
res.Min = schema.Float(0)
|
||||||
|
res.Max = schema.Float(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
jobMetric.Series = append(jobMetric.Series, schema.Series{
|
||||||
|
Hostname: res.Query.Hostname,
|
||||||
|
Id: id,
|
||||||
|
Statistics: &schema.MetricStatistics{
|
||||||
|
Avg: float64(res.Avg),
|
||||||
|
Min: float64(res.Min),
|
||||||
|
Max: float64(res.Max),
|
||||||
|
},
|
||||||
|
Data: res.Data,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return jobData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ccms *CCMetricStore) LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||||
|
res, err := ccms.doRequest(job, "stats", metrics, ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
resdata := make([]map[string]ApiStatsData, 0, len(job.Resources))
|
||||||
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var jobData schema.JobData = make(schema.JobData)
|
stats := map[string]map[string]schema.MetricStatistics{}
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
mc := config.GetMetricConfig(job.ClusterID, metric)
|
nodestats := map[string]schema.MetricStatistics{}
|
||||||
metricData := &schema.JobMetric{
|
for i, node := range job.Resources {
|
||||||
Scope: "node", // TODO: FIXME: Whatever...
|
if node.Accelerators != nil || node.HWThreads != nil {
|
||||||
Unit: mc.Unit,
|
// TODO/FIXME:
|
||||||
Timestep: mc.Sampletime,
|
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
|
||||||
Series: make([]*schema.MetricSeries, 0, len(job.Nodes)),
|
|
||||||
}
|
}
|
||||||
for i, node := range job.Nodes {
|
|
||||||
data := resdata[i][metric]
|
data := resdata[i][metric]
|
||||||
if data.Error != nil {
|
if data.Error != nil {
|
||||||
return nil, errors.New(*data.Error)
|
return nil, errors.New(*data.Error)
|
||||||
}
|
}
|
||||||
|
|
||||||
if data.Avg == nil || data.Min == nil || data.Max == nil {
|
if data.Samples == 0 {
|
||||||
return nil, errors.New("no data")
|
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node.Hostname, metric)
|
||||||
}
|
}
|
||||||
|
|
||||||
metricData.Series = append(metricData.Series, &schema.MetricSeries{
|
nodestats[node.Hostname] = schema.MetricStatistics{
|
||||||
NodeID: node,
|
Avg: float64(data.Avg),
|
||||||
Data: data.Data,
|
Min: float64(data.Min),
|
||||||
Statistics: &schema.MetricStatistics{
|
Max: float64(data.Max),
|
||||||
Avg: *data.Avg,
|
|
||||||
Min: *data.Min,
|
|
||||||
Max: *data.Max,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
jobData[metric] = metricData
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return jobData, nil
|
stats[metric] = nodestats
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ccms *CCMetricStore) LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error) {
|
||||||
|
reqBody := ApiRequestBody{}
|
||||||
|
reqBody.Metrics = metrics
|
||||||
|
for _, node := range nodes {
|
||||||
|
reqBody.Selectors = append(reqBody.Selectors, []string{clusterId, node})
|
||||||
|
}
|
||||||
|
|
||||||
|
reqBodyBytes, err := json.Marshal(reqBody)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var req *http.Request
|
||||||
|
if nodes == nil {
|
||||||
|
req, err = http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%s/%d/%d/all-nodes", ccms.url, clusterId, from, to), bytes.NewReader(reqBodyBytes))
|
||||||
|
} else {
|
||||||
|
req, err = http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%d/%d/timeseries", ccms.url, from, to), bytes.NewReader(reqBodyBytes))
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if ccms.jwt != "" {
|
||||||
|
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||||
|
}
|
||||||
|
res, err := ccms.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data := map[string]map[string][]schema.Float{}
|
||||||
|
if nodes == nil {
|
||||||
|
resdata := map[string]map[string]ApiMetricData{}
|
||||||
|
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for node, metrics := range resdata {
|
||||||
|
nodedata := map[string][]schema.Float{}
|
||||||
|
for metric, data := range metrics {
|
||||||
|
if data.Error != nil {
|
||||||
|
return nil, errors.New(*data.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
nodedata[metric] = data.Data
|
||||||
|
}
|
||||||
|
data[node] = nodedata
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
resdata := make([]map[string]ApiMetricData, 0, len(nodes))
|
||||||
|
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, node := range nodes {
|
||||||
|
metricsData := map[string][]schema.Float{}
|
||||||
|
for metric, data := range resdata[i] {
|
||||||
|
if data.Error != nil {
|
||||||
|
return nil, errors.New(*data.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
metricsData[metric] = data.Data
|
||||||
|
}
|
||||||
|
|
||||||
|
data[node] = metricsData
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, nil
|
||||||
}
|
}
|
||||||
|
179
metricdata/influxdb-v2.go
Normal file
179
metricdata/influxdb-v2.go
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
package metricdata
|
||||||
|
|
||||||
|
/*
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
|
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
type InfluxDBv2DataRepository struct {
|
||||||
|
client influxdb2.Client
|
||||||
|
queryClient influxdb2Api.QueryAPI
|
||||||
|
bucket, measurement string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idb *InfluxDBv2DataRepository) Init(url string) error {
|
||||||
|
token := os.Getenv("INFLUXDB_V2_TOKEN")
|
||||||
|
if token == "" {
|
||||||
|
log.Println("warning: environment variable 'INFLUXDB_V2_TOKEN' not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
idb.client = influxdb2.NewClient(url, token)
|
||||||
|
idb.queryClient = idb.client.QueryAPI("ClusterCockpit")
|
||||||
|
idb.bucket = "ClusterCockpit/data"
|
||||||
|
idb.measurement = "data"
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idb *InfluxDBv2DataRepository) formatTime(t time.Time) string {
|
||||||
|
return fmt.Sprintf("%d-%02d-%02dT%02d:%02d:%02dZ",
|
||||||
|
t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
||||||
|
fieldsConds := make([]string, 0, len(metrics))
|
||||||
|
for _, m := range metrics {
|
||||||
|
fieldsConds = append(fieldsConds, fmt.Sprintf(`r._field == "%s"`, m))
|
||||||
|
}
|
||||||
|
fieldsCond := strings.Join(fieldsConds, " or ")
|
||||||
|
|
||||||
|
hostsConds := make([]string, 0, len(job.Resources))
|
||||||
|
for _, h := range job.Resources {
|
||||||
|
if h.HWThreads != nil || h.Accelerators != nil {
|
||||||
|
// TODO/FIXME...
|
||||||
|
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
|
||||||
|
}
|
||||||
|
|
||||||
|
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
|
||||||
|
}
|
||||||
|
hostsCond := strings.Join(hostsConds, " or ")
|
||||||
|
|
||||||
|
query := fmt.Sprintf(`from(bucket: "%s")
|
||||||
|
|> range(start: %s, stop: %s)
|
||||||
|
|> filter(fn: (r) => r._measurement == "%s" and (%s) and (%s))
|
||||||
|
|> drop(columns: ["_start", "_stop", "_measurement"])`, idb.bucket,
|
||||||
|
idb.formatTime(job.StartTime), idb.formatTime(job.StartTime.Add(time.Duration(job.Duration)).Add(1*time.Second)),
|
||||||
|
idb.measurement, hostsCond, fieldsCond)
|
||||||
|
rows, err := idb.queryClient.Query(ctx, query)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobData := make(schema.JobData)
|
||||||
|
|
||||||
|
var currentSeries *schema.MetricSeries = nil
|
||||||
|
for rows.Next() {
|
||||||
|
row := rows.Record()
|
||||||
|
if currentSeries == nil || rows.TableChanged() {
|
||||||
|
field, host := row.Field(), row.ValueByKey("host").(string)
|
||||||
|
jobMetric, ok := jobData[field]
|
||||||
|
if !ok {
|
||||||
|
mc := config.GetMetricConfig(job.Cluster, field)
|
||||||
|
jobMetric = &schema.JobMetric{
|
||||||
|
Scope: "node", // TODO: FIXME: Whatever...
|
||||||
|
Unit: mc.Unit,
|
||||||
|
Timestep: mc.Timestep,
|
||||||
|
Series: make([]*schema.MetricSeries, 0, len(job.Resources)),
|
||||||
|
}
|
||||||
|
jobData[field] = jobMetric
|
||||||
|
}
|
||||||
|
|
||||||
|
currentSeries = &schema.MetricSeries{
|
||||||
|
Hostname: host,
|
||||||
|
Statistics: nil,
|
||||||
|
Data: make([]schema.Float, 0),
|
||||||
|
}
|
||||||
|
jobMetric.Series = append(jobMetric.Series, currentSeries)
|
||||||
|
}
|
||||||
|
|
||||||
|
val := row.Value().(float64)
|
||||||
|
currentSeries.Data = append(currentSeries.Data, schema.Float(val))
|
||||||
|
}
|
||||||
|
|
||||||
|
stats, err := idb.LoadStats(job, metrics, ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for metric, nodes := range stats {
|
||||||
|
jobMetric := jobData[metric]
|
||||||
|
for node, stats := range nodes {
|
||||||
|
for _, series := range jobMetric.Series {
|
||||||
|
if series.Hostname == node {
|
||||||
|
series.Statistics = &stats
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return jobData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idb *InfluxDBv2DataRepository) LoadStats(job *model.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||||
|
stats := map[string]map[string]schema.MetricStatistics{}
|
||||||
|
|
||||||
|
hostsConds := make([]string, 0, len(job.Resources))
|
||||||
|
for _, h := range job.Resources {
|
||||||
|
if h.HWThreads != nil || h.Accelerators != nil {
|
||||||
|
// TODO/FIXME...
|
||||||
|
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
|
||||||
|
}
|
||||||
|
|
||||||
|
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
|
||||||
|
}
|
||||||
|
hostsCond := strings.Join(hostsConds, " or ")
|
||||||
|
|
||||||
|
for _, metric := range metrics {
|
||||||
|
query := fmt.Sprintf(`
|
||||||
|
data = from(bucket: "%s")
|
||||||
|
|> range(start: %s, stop: %s)
|
||||||
|
|> filter(fn: (r) => r._measurement == "%s" and r._field == "%s" and (%s))
|
||||||
|
|
||||||
|
union(tables: [
|
||||||
|
data |> mean(column: "_value") |> set(key: "_field", value: "avg")
|
||||||
|
data |> min(column: "_value") |> set(key: "_field", value: "min")
|
||||||
|
data |> max(column: "_value") |> set(key: "_field", value: "max")
|
||||||
|
])
|
||||||
|
|> pivot(rowKey: ["host"], columnKey: ["_field"], valueColumn: "_value")
|
||||||
|
|> group()`, idb.bucket,
|
||||||
|
idb.formatTime(job.StartTime), idb.formatTime(job.StartTime.Add(time.Duration(job.Duration)).Add(1*time.Second)),
|
||||||
|
idb.measurement, metric, hostsCond)
|
||||||
|
rows, err := idb.queryClient.Query(ctx, query)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes := map[string]schema.MetricStatistics{}
|
||||||
|
for rows.Next() {
|
||||||
|
row := rows.Record()
|
||||||
|
host := row.ValueByKey("host").(string)
|
||||||
|
avg, min, max := row.ValueByKey("avg").(float64),
|
||||||
|
row.ValueByKey("min").(float64),
|
||||||
|
row.ValueByKey("max").(float64)
|
||||||
|
|
||||||
|
nodes[host] = schema.MetricStatistics{
|
||||||
|
Avg: avg,
|
||||||
|
Min: min,
|
||||||
|
Max: max,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats[metric] = nodes
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idb *InfluxDBv2DataRepository) LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
*/
|
@ -2,31 +2,74 @@ package metricdata
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
var runningJobs *CCMetricStore
|
type MetricDataRepository interface {
|
||||||
|
// Initialize this MetricDataRepository. One instance of
|
||||||
|
// this interface will only ever be responsible for one cluster.
|
||||||
|
Init(url, token string) error
|
||||||
|
|
||||||
func init() {
|
// Return the JobData for the given job, only with the requested metrics.
|
||||||
runningJobs = &CCMetricStore{}
|
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
|
||||||
if err := runningJobs.Init(); err != nil {
|
|
||||||
log.Fatalln(err)
|
// Return a map of metrics to a map of nodes to the metric statistics of the job.
|
||||||
|
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||||
|
|
||||||
|
// Return a map of nodes to a map of metrics to the data for the requested time.
|
||||||
|
LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||||
|
|
||||||
|
var JobArchivePath string
|
||||||
|
|
||||||
|
var useArchive bool
|
||||||
|
|
||||||
|
func Init(jobArchivePath string, disableArchive bool) error {
|
||||||
|
useArchive = !disableArchive
|
||||||
|
JobArchivePath = jobArchivePath
|
||||||
|
for _, cluster := range config.Clusters {
|
||||||
|
if cluster.MetricDataRepository != nil {
|
||||||
|
switch cluster.MetricDataRepository.Kind {
|
||||||
|
case "cc-metric-store":
|
||||||
|
ccms := &CCMetricStore{}
|
||||||
|
if err := ccms.Init(cluster.MetricDataRepository.Url, cluster.MetricDataRepository.Token); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
metricDataRepos[cluster.Name] = ccms
|
||||||
|
// case "influxdb-v2":
|
||||||
|
// idb := &InfluxDBv2DataRepository{}
|
||||||
|
// if err := idb.Init(cluster.MetricDataRepository.Url); err != nil {
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
// metricDataRepos[cluster.Name] = idb
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", cluster.MetricDataRepository.Kind, cluster.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetches the metric data for a job.
|
// Fetches the metric data for a job.
|
||||||
func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||||
if job.State == model.JobStateRunning {
|
if job.State == schema.JobStateRunning || !useArchive {
|
||||||
return runningJobs.LoadData(job, metrics, ctx)
|
repo, ok := metricDataRepos[job.Cluster]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
if job.State != model.JobStateCompleted {
|
data, err := repo.LoadData(job, metrics, scopes, ctx)
|
||||||
return nil, fmt.Errorf("job of state '%s' is not supported", job.State)
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
calcStatisticsSeries(job, data)
|
||||||
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := loadFromArchive(job)
|
data, err := loadFromArchive(job)
|
||||||
@ -47,10 +90,58 @@ func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.Job
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||||
func LoadAverages(job *model.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
|
func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
|
||||||
if job.State != model.JobStateCompleted {
|
if job.State != schema.JobStateRunning && useArchive {
|
||||||
return errors.New("only completed jobs are supported")
|
return loadAveragesFromArchive(job, metrics, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
return loadAveragesFromArchive(job, metrics, data)
|
repo, ok := metricDataRepos[job.Cluster]
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
stats, err := repo.LoadStats(job, metrics, ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, m := range metrics {
|
||||||
|
nodes, ok := stats[m]
|
||||||
|
if !ok {
|
||||||
|
data[i] = append(data[i], schema.NaN)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
sum := 0.0
|
||||||
|
for _, node := range nodes {
|
||||||
|
sum += node.Avg
|
||||||
|
}
|
||||||
|
data[i] = append(data[i], schema.Float(sum))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error) {
|
||||||
|
repo, ok := metricDataRepos[clusterId]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("no metric data repository configured for '%s'", clusterId)
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
for _, m := range config.GetClusterConfig(clusterId).MetricConfig {
|
||||||
|
metrics = append(metrics, m.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := repo.LoadNodeData(clusterId, metrics, nodes, from, to, ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if data == nil {
|
||||||
|
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", clusterId)
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, nil
|
||||||
}
|
}
|
||||||
|
115
rest-api.go
115
rest-api.go
@ -1,115 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
|
||||||
sq "github.com/Masterminds/squirrel"
|
|
||||||
)
|
|
||||||
|
|
||||||
type StartJobRequestBody struct {
|
|
||||||
JobId string `json:"job_id"`
|
|
||||||
UserId string `json:"user_id"`
|
|
||||||
ProjectId string `json:"project_id"`
|
|
||||||
ClusterId string `json:"cluster_id"`
|
|
||||||
StartTime int64 `json:"start_time"`
|
|
||||||
Nodes []string `json:"nodes"`
|
|
||||||
Metadata string `json:"metadata"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type StartJobResponeBody struct {
|
|
||||||
DBID int64 `json:"db_id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type StopJobRequestBody struct {
|
|
||||||
DBID *int64 `json:"db_id"`
|
|
||||||
JobId string `json:"job_id"`
|
|
||||||
ClusterId string `json:"cluster_id"`
|
|
||||||
StartTime int64 `json:"start_time"`
|
|
||||||
|
|
||||||
StopTime int64 `json:"stop_time"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func startJob(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
req := StartJobRequestBody{}
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.GetClusterConfig(req.ClusterId) == nil {
|
|
||||||
http.Error(rw, fmt.Sprintf("cluster '%s' does not exist", req.ClusterId), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
res, err := db.Exec(
|
|
||||||
`INSERT INTO job (job_id, user_id, cluster_id, start_time, duration, job_state, num_nodes, node_list, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);`,
|
|
||||||
req.JobId, req.UserId, req.ClusterId, req.StartTime, 0, model.JobStateRunning, len(req.Nodes), strings.Join(req.Nodes, ","), req.Metadata)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
id, err := res.LastInsertId()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("New job started (db-id=%d)\n", id)
|
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
|
||||||
rw.WriteHeader(http.StatusOK)
|
|
||||||
json.NewEncoder(rw).Encode(StartJobResponeBody{
|
|
||||||
DBID: id,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func stopJob(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
req := StopJobRequestBody{}
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var err error
|
|
||||||
var job *model.Job
|
|
||||||
if req.DBID != nil {
|
|
||||||
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").Where("job.id = ?", req.DBID).RunWith(db).QueryRow())
|
|
||||||
} else {
|
|
||||||
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").
|
|
||||||
Where("job.job_id = ?", req.JobId).
|
|
||||||
Where("job.cluster_id = ?", req.ClusterId).
|
|
||||||
Where("job.start_time = ?", req.StartTime).
|
|
||||||
RunWith(db).QueryRow())
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != model.JobStateRunning {
|
|
||||||
http.Error(rw, "stop_time must be larger than start_time and only running jobs can be stopped", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
job.Duration = int(job.StartTime.Unix() - req.StopTime)
|
|
||||||
if err := metricdata.ArchiveJob(job, r.Context()); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := db.Exec(`UPDATE job SET job.duration = ?, job.job_state = ? WHERE job.id = ?;`,
|
|
||||||
job.Duration, model.JobStateCompleted, job.ID); err != nil {
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
rw.WriteHeader(http.StatusOK)
|
|
||||||
}
|
|
150
schema/job.go
Normal file
150
schema/job.go
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
package schema
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Common subset of Job and JobMeta. Use one of those, not
|
||||||
|
// this type directly.
|
||||||
|
type BaseJob struct {
|
||||||
|
JobID int64 `json:"jobId" db:"job_id"`
|
||||||
|
User string `json:"user" db:"user"`
|
||||||
|
Project string `json:"project" db:"project"`
|
||||||
|
Cluster string `json:"cluster" db:"cluster"`
|
||||||
|
Partition string `json:"partition" db:"partition"`
|
||||||
|
ArrayJobId int32 `json:"arrayJobId" db:"array_job_id"`
|
||||||
|
NumNodes int32 `json:"numNodes" db:"num_nodes"`
|
||||||
|
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads"`
|
||||||
|
NumAcc int32 `json:"numAcc" db:"num_acc"`
|
||||||
|
Exclusive int32 `json:"exclusive" db:"exclusive"`
|
||||||
|
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status"`
|
||||||
|
SMT int32 `json:"smt" db:"smt"`
|
||||||
|
State JobState `json:"jobState" db:"job_state"`
|
||||||
|
Duration int32 `json:"duration" db:"duration"`
|
||||||
|
Tags []*Tag `json:"tags"`
|
||||||
|
RawResources []byte `json:"-" db:"resources"`
|
||||||
|
Resources []*Resource `json:"resources"`
|
||||||
|
MetaData interface{} `json:"metaData" db:"meta_data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// This type is used as the GraphQL interface and using sqlx as a table row.
|
||||||
|
type Job struct {
|
||||||
|
ID int64 `json:"id" db:"id"`
|
||||||
|
BaseJob
|
||||||
|
StartTime time.Time `json:"startTime" db:"start_time"`
|
||||||
|
MemUsedMax float64 `json:"-" db:"mem_used_max"`
|
||||||
|
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"`
|
||||||
|
MemBwAvg float64 `json:"-" db:"mem_bw_avg"`
|
||||||
|
LoadAvg float64 `json:"-" db:"load_avg"`
|
||||||
|
NetBwAvg float64 `json:"-" db:"net_bw_avg"`
|
||||||
|
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"`
|
||||||
|
FileBwAvg float64 `json:"-" db:"file_bw_avg"`
|
||||||
|
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// When reading from the database or sending data via GraphQL, the start time can be in the much more
|
||||||
|
// convenient time.Time type. In the `meta.json` files, the start time is encoded as a unix epoch timestamp.
|
||||||
|
// This is why there is this struct, which contains all fields from the regular job struct, but "overwrites"
|
||||||
|
// the StartTime field with one of type int64.
|
||||||
|
type JobMeta struct {
|
||||||
|
BaseJob
|
||||||
|
StartTime int64 `json:"startTime" db:"start_time"`
|
||||||
|
Statistics map[string]JobStatistics `json:"statistics,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var JobDefaults BaseJob = BaseJob{
|
||||||
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: 1,
|
||||||
|
MetaData: "",
|
||||||
|
}
|
||||||
|
|
||||||
|
var JobColumns []string = []string{
|
||||||
|
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.start_time", "job.partition", "job.array_job_id", "job.num_nodes",
|
||||||
|
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
||||||
|
"job.duration", "job.resources", "job.meta_data",
|
||||||
|
}
|
||||||
|
|
||||||
|
type Scannable interface {
|
||||||
|
StructScan(dest interface{}) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function for scanning jobs with the `jobTableCols` columns selected.
|
||||||
|
func ScanJob(row Scannable) (*Job, error) {
|
||||||
|
job := &Job{BaseJob: JobDefaults}
|
||||||
|
if err := row.StructScan(job); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.Duration == 0 && job.State == JobStateRunning {
|
||||||
|
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawResources = nil
|
||||||
|
return job, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobStatistics struct {
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
Avg float64 `json:"avg"`
|
||||||
|
Min float64 `json:"min"`
|
||||||
|
Max float64 `json:"max"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Tag struct {
|
||||||
|
ID int64 `json:"id" db:"id"`
|
||||||
|
Type string `json:"type" db:"tag_type"`
|
||||||
|
Name string `json:"name" db:"tag_name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Resource struct {
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
|
HWThreads []int `json:"hwthreads,omitempty"`
|
||||||
|
Accelerators []int `json:"accelerators,omitempty"`
|
||||||
|
Configuration string `json:"configuration,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
JobStateRunning JobState = "running"
|
||||||
|
JobStateCompleted JobState = "completed"
|
||||||
|
JobStateFailed JobState = "failed"
|
||||||
|
JobStateCanceled JobState = "canceled"
|
||||||
|
JobStateStopped JobState = "stopped"
|
||||||
|
JobStateTimeout JobState = "timeout"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
||||||
|
str, ok := v.(string)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("enums must be strings")
|
||||||
|
}
|
||||||
|
|
||||||
|
*e = JobState(str)
|
||||||
|
if !e.Valid() {
|
||||||
|
return errors.New("invalid job state")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e JobState) MarshalGQL(w io.Writer) {
|
||||||
|
fmt.Fprintf(w, "\"%s\"", e)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e JobState) Valid() bool {
|
||||||
|
return e == JobStateRunning ||
|
||||||
|
e == JobStateCompleted ||
|
||||||
|
e == JobStateFailed ||
|
||||||
|
e == JobStateCanceled ||
|
||||||
|
e == JobStateStopped ||
|
||||||
|
e == JobStateTimeout
|
||||||
|
}
|
@ -5,39 +5,21 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Format of `data.json` files.
|
type JobData map[string]map[MetricScope]*JobMetric
|
||||||
type JobData map[string]*JobMetric
|
|
||||||
|
|
||||||
type JobMetric struct {
|
type JobMetric struct {
|
||||||
Unit string `json:"unit"`
|
Unit string `json:"unit"`
|
||||||
Scope MetricScope `json:"scope"`
|
Scope MetricScope `json:"scope"`
|
||||||
Timestep int `json:"timestep"`
|
Timestep int `json:"timestep"`
|
||||||
Series []*MetricSeries `json:"series"`
|
Series []Series `json:"series"`
|
||||||
|
StatisticsSeries *StatsSeries `json:"statisticsSeries"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricScope string
|
type Series struct {
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
const (
|
Id *int `json:"id,omitempty"`
|
||||||
MetricScopeNode MetricScope = "node"
|
Statistics *MetricStatistics `json:"statistics"`
|
||||||
MetricScopeSocket MetricScope = "socket"
|
Data []Float `json:"data"`
|
||||||
MetricScopeCpu MetricScope = "cpu"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (e *MetricScope) UnmarshalGQL(v interface{}) error {
|
|
||||||
str, ok := v.(string)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("enums must be strings")
|
|
||||||
}
|
|
||||||
|
|
||||||
*e = MetricScope(str)
|
|
||||||
if *e != "node" && *e != "socket" && *e != "cpu" {
|
|
||||||
return fmt.Errorf("%s is not a valid MetricScope", str)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e MetricScope) MarshalGQL(w io.Writer) {
|
|
||||||
fmt.Fprintf(w, "\"%s\"", e)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricStatistics struct {
|
type MetricStatistics struct {
|
||||||
@ -46,33 +28,51 @@ type MetricStatistics struct {
|
|||||||
Max float64 `json:"max"`
|
Max float64 `json:"max"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricSeries struct {
|
type StatsSeries struct {
|
||||||
NodeID string `json:"node_id"`
|
Mean []Float `json:"mean"`
|
||||||
Statistics *MetricStatistics `json:"statistics"`
|
Min []Float `json:"min"`
|
||||||
Data []Float `json:"data"`
|
Max []Float `json:"max"`
|
||||||
|
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetaStatistics struct {
|
type MetricScope string
|
||||||
Unit string `json:"unit"`
|
|
||||||
Avg float64 `json:"avg"`
|
const (
|
||||||
Min float64 `json:"min"`
|
MetricScopeNode MetricScope = "node"
|
||||||
Max float64 `json:"max"`
|
MetricScopeSocket MetricScope = "socket"
|
||||||
|
MetricScopeCpu MetricScope = "cpu"
|
||||||
|
MetricScopeHWThread MetricScope = "hwthread"
|
||||||
|
)
|
||||||
|
|
||||||
|
var metricScopeGranularity map[MetricScope]int = map[MetricScope]int{
|
||||||
|
MetricScopeNode: 1,
|
||||||
|
MetricScopeSocket: 2,
|
||||||
|
MetricScopeCpu: 3,
|
||||||
|
MetricScopeHWThread: 4,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format of `meta.json` files.
|
func (e *MetricScope) MaxGranularity(other MetricScope) MetricScope {
|
||||||
type JobMeta struct {
|
a := metricScopeGranularity[*e]
|
||||||
JobId string `json:"job_id"`
|
b := metricScopeGranularity[other]
|
||||||
UserId string `json:"user_id"`
|
if a < b {
|
||||||
ProjectId string `json:"project_id"`
|
return *e
|
||||||
ClusterId string `json:"cluster_id"`
|
}
|
||||||
NumNodes int `json:"num_nodes"`
|
return other
|
||||||
JobState string `json:"job_state"`
|
}
|
||||||
StartTime int64 `json:"start_time"`
|
|
||||||
Duration int64 `json:"duration"`
|
func (e *MetricScope) UnmarshalGQL(v interface{}) error {
|
||||||
Nodes []string `json:"nodes"`
|
str, ok := v.(string)
|
||||||
Tags []struct {
|
if !ok {
|
||||||
Name string `json:"name"`
|
return fmt.Errorf("enums must be strings")
|
||||||
Type string `json:"type"`
|
}
|
||||||
} `json:"tags"`
|
|
||||||
Statistics map[string]*JobMetaStatistics `json:"statistics"`
|
*e = MetricScope(str)
|
||||||
|
if _, ok := metricScopeGranularity[*e]; !ok {
|
||||||
|
return fmt.Errorf("%s is not a valid MetricScope", str)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e MetricScope) MarshalGQL(w io.Writer) {
|
||||||
|
fmt.Fprintf(w, "\"%s\"", e)
|
||||||
}
|
}
|
||||||
|
433
server.go
433
server.go
@ -3,19 +3,23 @@ package main
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"strconv"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/99designs/gqlgen/graphql/handler"
|
"github.com/99designs/gqlgen/graphql/handler"
|
||||||
"github.com/99designs/gqlgen/graphql/playground"
|
"github.com/99designs/gqlgen/graphql/playground"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/api"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
|
"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-jobarchive/templates"
|
||||||
"github.com/gorilla/handlers"
|
"github.com/gorilla/handlers"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
@ -24,86 +28,423 @@ import (
|
|||||||
|
|
||||||
var db *sqlx.DB
|
var db *sqlx.DB
|
||||||
|
|
||||||
func main() {
|
// Format of the configurartion (file). See below for the defaults.
|
||||||
var reinitDB bool
|
type ProgramConfig struct {
|
||||||
var port, staticFiles, jobDBFile string
|
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||||
|
Addr string `json:"addr"`
|
||||||
|
|
||||||
flag.StringVar(&port, "port", "8080", "Port on which to listen")
|
// Disable authentication (for everything: API, Web-UI, ...)
|
||||||
flag.StringVar(&staticFiles, "static-files", "./frontend/public", "Directory who's contents shall be served as static files")
|
DisableAuthentication bool `json:"disable-authentication"`
|
||||||
flag.StringVar(&jobDBFile, "job-db", "./var/job.db", "SQLite 3 Jobs Database File")
|
|
||||||
flag.BoolVar(&reinitDB, "init-db", false, "Initialize new SQLite Database")
|
// Folder where static assets can be found, will be served directly
|
||||||
|
StaticFiles string `json:"static-files"`
|
||||||
|
|
||||||
|
// Currently only SQLite3 ist supported, so this should be a filename
|
||||||
|
DB string `json:"db"`
|
||||||
|
|
||||||
|
// Path to the job-archive
|
||||||
|
JobArchive string `json:"job-archive"`
|
||||||
|
|
||||||
|
// Make the /api/jobs/stop_job endpoint do the heavy work in the background.
|
||||||
|
AsyncArchiving bool `json:"async-archive"`
|
||||||
|
|
||||||
|
// Keep all metric data in the metric data repositories,
|
||||||
|
// do not write to the job-archive.
|
||||||
|
DisableArchive bool `json:"disable-archive"`
|
||||||
|
|
||||||
|
// For LDAP Authentication and user syncronisation.
|
||||||
|
LdapConfig *auth.LdapConfig `json:"ldap"`
|
||||||
|
|
||||||
|
// If both those options are not empty, use HTTPS using those certificates.
|
||||||
|
HttpsCertFile string `json:"https-cert-file"`
|
||||||
|
HttpsKeyFile string `json:"https-key-file"`
|
||||||
|
|
||||||
|
// If overwriten, at least all the options in the defaults below must
|
||||||
|
// be provided! Most options here can be overwritten by the user.
|
||||||
|
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
||||||
|
|
||||||
|
// Where to store MachineState files
|
||||||
|
MachineStateDir string `json:"machine-state-dir"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var programConfig ProgramConfig = ProgramConfig{
|
||||||
|
Addr: "0.0.0.0:8080",
|
||||||
|
DisableAuthentication: false,
|
||||||
|
StaticFiles: "./frontend/public",
|
||||||
|
DB: "./var/job.db",
|
||||||
|
JobArchive: "./var/job-archive",
|
||||||
|
AsyncArchiving: true,
|
||||||
|
DisableArchive: false,
|
||||||
|
LdapConfig: &auth.LdapConfig{
|
||||||
|
Url: "ldap://localhost",
|
||||||
|
UserBase: "ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
|
||||||
|
SearchDN: "cn=admin,dc=rrze,dc=uni-erlangen,dc=de",
|
||||||
|
UserBind: "uid={username},ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
|
||||||
|
UserFilter: "(&(objectclass=posixAccount)(uid=*))",
|
||||||
|
},
|
||||||
|
HttpsCertFile: "",
|
||||||
|
HttpsKeyFile: "",
|
||||||
|
UiDefaults: map[string]interface{}{
|
||||||
|
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||||
|
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||||
|
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||||
|
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"},
|
||||||
|
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||||
|
"plot_general_colorBackground": true,
|
||||||
|
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||||
|
"plot_general_lineWidth": 1,
|
||||||
|
"plot_list_jobsPerPage": 10,
|
||||||
|
"plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw", "clock"},
|
||||||
|
"plot_view_plotsPerRow": 4,
|
||||||
|
"plot_view_showPolarplot": true,
|
||||||
|
"plot_view_showRoofline": true,
|
||||||
|
"plot_view_showStatTable": true,
|
||||||
|
},
|
||||||
|
MachineStateDir: "./var/machine-state",
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var flagReinitDB, flagStopImmediately, flagSyncLDAP bool
|
||||||
|
var flagConfigFile string
|
||||||
|
var flagNewUser, flagDelUser, flagGenJWT string
|
||||||
|
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize `job`, `tag`, and `jobtag` tables")
|
||||||
|
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the `user` table with ldap")
|
||||||
|
flag.BoolVar(&flagStopImmediately, "no-server", false, "Do not start a server, stop right after initialization and argument handling")
|
||||||
|
flag.StringVar(&flagConfigFile, "config", "", "Location of the config file for this server (overwrites the defaults)")
|
||||||
|
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin|api]:<password>`")
|
||||||
|
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by username")
|
||||||
|
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by the username")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
if flagConfigFile != "" {
|
||||||
|
data, err := os.ReadFile(flagConfigFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(data, &programConfig); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
db, err = sqlx.Open("sqlite3", jobDBFile)
|
// This might need to change for other databases:
|
||||||
|
db, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", programConfig.DB))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// See https://github.com/mattn/go-sqlite3/issues/274
|
// Only for sqlite, not needed for any other database:
|
||||||
db.SetMaxOpenConns(1)
|
db.SetMaxOpenConns(1)
|
||||||
defer db.Close()
|
|
||||||
|
|
||||||
if reinitDB {
|
// Initialize sub-modules...
|
||||||
if err = initDB(db, metricdata.JobArchivePath); err != nil {
|
|
||||||
|
if !programConfig.DisableAuthentication {
|
||||||
|
if err := auth.Init(db, programConfig.LdapConfig); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagNewUser != "" {
|
||||||
|
if err := auth.AddUserToDB(db, flagNewUser); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if flagDelUser != "" {
|
||||||
|
if err := auth.DelUserFromDB(db, flagDelUser); err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
config.Clusters, err = loadClusters()
|
if flagSyncLDAP {
|
||||||
|
auth.SyncWithLDAP(db)
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagGenJWT != "" {
|
||||||
|
user, err := auth.FetchUserFromDB(db, flagGenJWT)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !user.IsAPIUser {
|
||||||
|
log.Println("warning: that user does not have the API role")
|
||||||
|
}
|
||||||
|
|
||||||
|
jwt, err := auth.ProvideJWT(user)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("JWT for '%s': %s\n", user.Username, jwt)
|
||||||
|
}
|
||||||
|
} else if flagNewUser != "" || flagDelUser != "" {
|
||||||
|
log.Fatalln("arguments --add-user and --del-user can only be used if authentication is enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := config.Init(db, !programConfig.DisableAuthentication, programConfig.UiDefaults, programConfig.JobArchive); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := metricdata.Init(programConfig.JobArchive, programConfig.DisableArchive); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagReinitDB {
|
||||||
|
if err := initDB(db, programConfig.JobArchive); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagStopImmediately {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build routes...
|
||||||
|
|
||||||
|
resolver := &graph.Resolver{DB: db}
|
||||||
|
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||||
|
|
||||||
|
// graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||||
|
// switch e := err.(type) {
|
||||||
|
// case string:
|
||||||
|
// return fmt.Errorf("panic: %s", e)
|
||||||
|
// case error:
|
||||||
|
// return fmt.Errorf("panic caused by: %w", e)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// return errors.New("internal server error (panic)")
|
||||||
|
// })
|
||||||
|
|
||||||
|
graphQLPlayground := playground.Handler("GraphQL playground", "/query")
|
||||||
|
api := &api.RestApi{
|
||||||
|
DB: db,
|
||||||
|
AsyncArchiving: programConfig.AsyncArchiving,
|
||||||
|
Resolver: resolver,
|
||||||
|
MachineStateDir: programConfig.MachineStateDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
handleGetLogin := func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
templates.Render(rw, r, "login.html", &templates.Page{
|
||||||
|
Title: "Login",
|
||||||
|
Login: &templates.LoginPage{},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
r := mux.NewRouter()
|
r := mux.NewRouter()
|
||||||
loggedRouter := handlers.LoggingHandler(os.Stdout, r)
|
r.NotFoundHandler = http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
templates.Render(rw, r, "404.html", &templates.Page{
|
||||||
|
Title: "Not found",
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
srv := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{
|
r.Handle("/playground", graphQLPlayground)
|
||||||
Resolvers: &graph.Resolver{DB: db}}))
|
r.Handle("/login", auth.Login(db)).Methods(http.MethodPost)
|
||||||
r.HandleFunc("/graphql-playground", playground.Handler("GraphQL playground", "/query"))
|
r.HandleFunc("/login", handleGetLogin).Methods(http.MethodGet)
|
||||||
r.Handle("/query", srv)
|
r.HandleFunc("/logout", auth.Logout).Methods(http.MethodPost)
|
||||||
|
|
||||||
r.HandleFunc("/config.json", config.ServeConfig).Methods("GET")
|
secured := r.PathPrefix("/").Subrouter()
|
||||||
|
if !programConfig.DisableAuthentication {
|
||||||
|
secured.Use(auth.Auth)
|
||||||
|
}
|
||||||
|
secured.Handle("/query", graphQLEndpoint)
|
||||||
|
|
||||||
r.HandleFunc("/api/start-job", startJob).Methods("POST")
|
secured.HandleFunc("/", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
r.HandleFunc("/api/stop-job", stopJob).Methods("POST")
|
conf, err := config.GetUIConfig(r)
|
||||||
|
if err != nil {
|
||||||
if len(staticFiles) != 0 {
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(staticFiles)))
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("GraphQL playground: http://localhost:%s/graphql-playground", port)
|
infos := map[string]interface{}{
|
||||||
log.Printf("Home: http://localhost:%s/index.html", port)
|
"clusters": config.Clusters,
|
||||||
log.Fatal(http.ListenAndServe("127.0.0.1:"+port,
|
"username": "",
|
||||||
handlers.CORS(handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization"}),
|
"admin": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
if user := auth.GetUser(r.Context()); user != nil {
|
||||||
|
infos["username"] = user.Username
|
||||||
|
infos["admin"] = user.IsAdmin
|
||||||
|
}
|
||||||
|
|
||||||
|
templates.Render(rw, r, "home.html", &templates.Page{
|
||||||
|
Title: "ClusterCockpit",
|
||||||
|
Config: conf,
|
||||||
|
Infos: infos,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
monitoringRoutes(secured, resolver)
|
||||||
|
api.MountRoutes(secured)
|
||||||
|
|
||||||
|
r.PathPrefix("/").Handler(http.FileServer(http.Dir(programConfig.StaticFiles)))
|
||||||
|
handler := handlers.CORS(
|
||||||
|
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization"}),
|
||||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||||
handlers.AllowedOrigins([]string{"*"}))(loggedRouter)))
|
handlers.AllowedOrigins([]string{"*"}))(handlers.LoggingHandler(os.Stdout, handlers.CompressHandler(r)))
|
||||||
|
|
||||||
|
// Start http or https server
|
||||||
|
if programConfig.HttpsCertFile != "" && programConfig.HttpsKeyFile != "" {
|
||||||
|
log.Printf("HTTPS server running at %s...", programConfig.Addr)
|
||||||
|
err = http.ListenAndServeTLS(programConfig.Addr, programConfig.HttpsCertFile, programConfig.HttpsKeyFile, handler)
|
||||||
|
} else {
|
||||||
|
log.Printf("HTTP server running at %s...", programConfig.Addr)
|
||||||
|
err = http.ListenAndServe(programConfig.Addr, handler)
|
||||||
|
}
|
||||||
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadClusters() ([]*model.Cluster, error) {
|
func monitoringRoutes(router *mux.Router, resolver *graph.Resolver) {
|
||||||
entries, err := os.ReadDir(metricdata.JobArchivePath)
|
buildFilterPresets := func(query url.Values) map[string]interface{} {
|
||||||
|
filterPresets := map[string]interface{}{}
|
||||||
|
|
||||||
|
if query.Get("cluster") != "" {
|
||||||
|
filterPresets["cluster"] = query.Get("cluster")
|
||||||
|
}
|
||||||
|
if query.Get("project") != "" {
|
||||||
|
filterPresets["project"] = query.Get("project")
|
||||||
|
}
|
||||||
|
if query.Get("state") != "" && schema.JobState(query.Get("state")).Valid() {
|
||||||
|
filterPresets["state"] = query.Get("state")
|
||||||
|
}
|
||||||
|
if rawtags, ok := query["tag"]; ok {
|
||||||
|
tags := make([]int, len(rawtags))
|
||||||
|
for i, tid := range rawtags {
|
||||||
|
var err error
|
||||||
|
tags[i], err = strconv.Atoi(tid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
tags[i] = -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filterPresets["tags"] = tags
|
||||||
}
|
}
|
||||||
|
|
||||||
clusters := []*model.Cluster{}
|
return filterPresets
|
||||||
for _, de := range entries {
|
}
|
||||||
bytes, err := os.ReadFile(filepath.Join(metricdata.JobArchivePath, de.Name(), "cluster.json"))
|
|
||||||
|
router.HandleFunc("/monitoring/jobs/", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
conf, err := config.GetUIConfig(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var cluster model.Cluster
|
templates.Render(rw, r, "monitoring/jobs.html", &templates.Page{
|
||||||
if err := json.Unmarshal(bytes, &cluster); err != nil {
|
Title: "Jobs - ClusterCockpit",
|
||||||
return nil, err
|
Config: conf,
|
||||||
|
FilterPresets: buildFilterPresets(r.URL.Query()),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
router.HandleFunc("/monitoring/job/{id:[0-9]+}", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
conf, err := config.GetUIConfig(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if cluster.FilterRanges.StartTime.To.IsZero() {
|
id := mux.Vars(r)["id"]
|
||||||
cluster.FilterRanges.StartTime.To = time.Unix(0, 0)
|
job, err := resolver.Query().Job(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
clusters = append(clusters, &cluster)
|
templates.Render(rw, r, "monitoring/job.html", &templates.Page{
|
||||||
|
Title: fmt.Sprintf("Job %d - ClusterCockpit", job.JobID),
|
||||||
|
Config: conf,
|
||||||
|
Infos: map[string]interface{}{
|
||||||
|
"id": id,
|
||||||
|
"jobId": job.JobID,
|
||||||
|
"clusterId": job.Cluster,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
router.HandleFunc("/monitoring/users/", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
conf, err := config.GetUIConfig(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
return clusters, nil
|
templates.Render(rw, r, "monitoring/users.html", &templates.Page{
|
||||||
|
Title: "Users - ClusterCockpit",
|
||||||
|
Config: conf,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
router.HandleFunc("/monitoring/user/{id}", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
conf, err := config.GetUIConfig(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
// TODO: One could check if the user exists, but that would be unhelpfull if authentication
|
||||||
|
// is disabled or the user does not exist but has started jobs.
|
||||||
|
|
||||||
|
templates.Render(rw, r, "monitoring/user.html", &templates.Page{
|
||||||
|
Title: fmt.Sprintf("User %s - ClusterCockpit", id),
|
||||||
|
Config: conf,
|
||||||
|
Infos: map[string]interface{}{"username": id},
|
||||||
|
FilterPresets: buildFilterPresets(r.URL.Query()),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
router.HandleFunc("/monitoring/analysis/", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
conf, err := config.GetUIConfig(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
filterPresets := map[string]interface{}{}
|
||||||
|
query := r.URL.Query()
|
||||||
|
if query.Get("cluster") != "" {
|
||||||
|
filterPresets["clusterId"] = query.Get("cluster")
|
||||||
|
}
|
||||||
|
|
||||||
|
templates.Render(rw, r, "monitoring/analysis.html", &templates.Page{
|
||||||
|
Title: "Analysis View - ClusterCockpit",
|
||||||
|
Config: conf,
|
||||||
|
FilterPresets: filterPresets,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
router.HandleFunc("/monitoring/systems/", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
conf, err := config.GetUIConfig(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
filterPresets := map[string]interface{}{}
|
||||||
|
query := r.URL.Query()
|
||||||
|
if query.Get("cluster") != "" {
|
||||||
|
filterPresets["clusterId"] = query.Get("cluster")
|
||||||
|
}
|
||||||
|
|
||||||
|
templates.Render(rw, r, "monitoring/systems.html", &templates.Page{
|
||||||
|
Title: "System View - ClusterCockpit",
|
||||||
|
Config: conf,
|
||||||
|
FilterPresets: filterPresets,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
router.HandleFunc("/monitoring/node/{clusterId}/{nodeId}", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
conf, err := config.GetUIConfig(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
vars := mux.Vars(r)
|
||||||
|
templates.Render(rw, r, "monitoring/node.html", &templates.Page{
|
||||||
|
Title: fmt.Sprintf("Node %s - ClusterCockpit", vars["nodeId"]),
|
||||||
|
Config: conf,
|
||||||
|
Infos: map[string]interface{}{
|
||||||
|
"nodeId": vars["nodeId"],
|
||||||
|
"clusterId": vars["clusterId"],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
10
templates/404.html
Normal file
10
templates/404.html
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
{{template "base.html" .}}
|
||||||
|
{{define "content"}}
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<div class="alert alert-error" role="alert">
|
||||||
|
404: Not found
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
28
templates/base.html
Normal file
28
templates/base.html
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset='utf-8'>
|
||||||
|
<meta name='viewport' content='width=device-width,initial-scale=1'>
|
||||||
|
<title>{{.Title}}</title>
|
||||||
|
|
||||||
|
<link rel='icon' type='image/png' href='/favicon.png'>
|
||||||
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.1/dist/css/bootstrap.min.css" integrity="sha384-F3w7mX95PdgyTmZZMECAngseQB83DfGTowi0iMjiWaeVhAn4FJkqJByhZMI3AhiU" crossorigin="anonymous">
|
||||||
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.4.1/font/bootstrap-icons.css">
|
||||||
|
<link rel='stylesheet' href='/global.css'>
|
||||||
|
<link rel='stylesheet' href='/uPlot.min.css'>
|
||||||
|
|
||||||
|
{{block "stylesheets" .}}{{end}}
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
{{block "content" .}}
|
||||||
|
Whoops, you should not see this...
|
||||||
|
{{end}}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{{block "javascript" .}}{{end}}
|
||||||
|
</body>
|
||||||
|
</html>
|
57
templates/home.html
Normal file
57
templates/home.html
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
{{if .Infos.username}}
|
||||||
|
<i class="bi bi-person-circle"></i> {{ .Infos.username }}
|
||||||
|
{{if .Infos.admin}}
|
||||||
|
<span class="badge bg-primary">Admin</span>
|
||||||
|
{{end}}
|
||||||
|
{{end}}
|
||||||
|
</div>
|
||||||
|
<div class="col" style="text-align: right;">
|
||||||
|
<form method="post" action="/logout">
|
||||||
|
<button type="submit" class="btn btn-primary">Logout</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
{{if .Infos.admin}}
|
||||||
|
<div class="col-4">
|
||||||
|
<ul>
|
||||||
|
<li><a href="/monitoring/jobs/">All jobs</a></li>
|
||||||
|
<li><a href="/monitoring/users/">All users</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
{{else}}
|
||||||
|
<div class="col-4">
|
||||||
|
<ul>
|
||||||
|
<li><a href="/monitoring/jobs/">My jobs</a></li>
|
||||||
|
<li><a href="/monitoring/user/{{.Infos.username}}">My user view</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
||||||
|
<div class="col-8">
|
||||||
|
<h2>Clusters</h2>
|
||||||
|
<table class="table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Jobs</th>
|
||||||
|
<th>System View</th>
|
||||||
|
<th>Analysis View</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{{range .Infos.clusters}}
|
||||||
|
<tr>
|
||||||
|
<td>{{.Name}}</td>
|
||||||
|
<td><a href="/monitoring/jobs/?cluster={{.Name}}">Jobs</a></td>
|
||||||
|
<td><a href="/monitoring/systems/?cluster={{.Name}}">System View</a></td>
|
||||||
|
<td><a href="/monitoring/analysis/?cluster={{.Name}}">Analysis View</a></td>
|
||||||
|
</tr>
|
||||||
|
{{end}}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
47
templates/login.html
Normal file
47
templates/login.html
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<h1>
|
||||||
|
ClusterCockpit Login
|
||||||
|
</h1>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
{{if .Login.Error}}
|
||||||
|
<div class="alert alert-warning" role="alert">
|
||||||
|
{{.Login.Error}}
|
||||||
|
</div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{if .Login.Info}}
|
||||||
|
<div class="alert alert-success" role="alert">
|
||||||
|
{{.Login.Info}}
|
||||||
|
</div>
|
||||||
|
{{end}}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<form method="post" action="/login">
|
||||||
|
<div class="mb-3">
|
||||||
|
<label class="form-label" for="username">Username</label>
|
||||||
|
<input class="form-control" type="text" id="username" name="username">
|
||||||
|
</div>
|
||||||
|
<div class="mb-3">
|
||||||
|
<label class="form-label" for="password">Password</label>
|
||||||
|
<input class="form-control" type="password" id="password" name="password">
|
||||||
|
</div>
|
||||||
|
<button type="submit" class="btn btn-primary">Login</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<br/>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<form method="post" action="/logout">
|
||||||
|
<button type="submit" class="btn btn-primary">Logout</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
18
templates/monitoring/analysis.html
Normal file
18
templates/monitoring/analysis.html
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div id="svelte-app"></div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{define "stylesheets"}}
|
||||||
|
<link rel='stylesheet' href='/build/analysis.css'>
|
||||||
|
{{end}}
|
||||||
|
{{define "javascript"}}
|
||||||
|
<script>
|
||||||
|
const filterPresets = {{ .FilterPresets }};
|
||||||
|
const clusterCockpitConfigPromise = Promise.resolve({
|
||||||
|
plot_view_plotsPerRow: {{ .Config.plot_view_plotsPerRow }},
|
||||||
|
analysis_view_histogramMetrics: {{ .Config.analysis_view_histogramMetrics }},
|
||||||
|
analysis_view_scatterPlotMetrics: {{ .Config.analysis_view_scatterPlotMetrics }}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
<script src='/build/analysis.js'></script>
|
||||||
|
{{end}}
|
29
templates/monitoring/job.html
Normal file
29
templates/monitoring/job.html
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div id="svelte-app"></div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{define "stylesheets"}}
|
||||||
|
<link rel='stylesheet' href='/build/job.css'>
|
||||||
|
{{end}}
|
||||||
|
{{define "javascript"}}
|
||||||
|
<script>
|
||||||
|
const jobInfos = {
|
||||||
|
id: "{{ .Infos.id }}",
|
||||||
|
jobId: "{{ .Infos.jobId }}",
|
||||||
|
clusterId: "{{ .Infos.clusterId }}"
|
||||||
|
};
|
||||||
|
const clusterCockpitConfigPromise = Promise.resolve({
|
||||||
|
plot_general_colorscheme: {{ .Config.plot_general_colorscheme }},
|
||||||
|
plot_general_lineWidth: {{ .Config.plot_general_lineWidth }},
|
||||||
|
plot_general_colorBackground: {{ .Config.plot_general_colorBackground }},
|
||||||
|
plot_view_showRoofline: {{ .Config.plot_view_showRoofline }},
|
||||||
|
plot_view_showPolarplot: {{ .Config.plot_view_showPolarplot }},
|
||||||
|
plot_view_showStatTable: {{ .Config.plot_view_showStatTable }},
|
||||||
|
plot_view_plotsPerRow: {{ .Config.plot_view_plotsPerRow }},
|
||||||
|
job_view_selectedMetrics: {{ .Config.job_view_selectedMetrics }},
|
||||||
|
job_view_nodestats_selectedMetrics: {{ .Config.job_view_nodestats_selectedMetrics }},
|
||||||
|
job_view_polarPlotMetrics: {{ .Config.plot_view_polarPlotMetrics }},
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
<script src='/build/job.js'></script>
|
||||||
|
{{end}}
|
14
templates/monitoring/jobs.html
Normal file
14
templates/monitoring/jobs.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div id="svelte-app"></div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{define "stylesheets"}}
|
||||||
|
<link rel='stylesheet' href='/build/jobs.css'>
|
||||||
|
{{end}}
|
||||||
|
{{define "javascript"}}
|
||||||
|
<script>
|
||||||
|
const filterPresets = {{ .FilterPresets }};
|
||||||
|
const clusterCockpitConfig = {{ .Config }};
|
||||||
|
</script>
|
||||||
|
<script src='/build/jobs.js'></script>
|
||||||
|
{{end}}
|
21
templates/monitoring/node.html
Normal file
21
templates/monitoring/node.html
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div id="svelte-app"></div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{define "stylesheets"}}
|
||||||
|
<link rel='stylesheet' href='/build/node.css'>
|
||||||
|
{{end}}
|
||||||
|
{{define "javascript"}}
|
||||||
|
<script>
|
||||||
|
const nodeInfos = {
|
||||||
|
nodeId: "{{ .Infos.nodeId }}",
|
||||||
|
clusterId: "{{ .Infos.clusterId }}"
|
||||||
|
};
|
||||||
|
const clusterCockpitConfigPromise = Promise.resolve({
|
||||||
|
plot_general_colorscheme: {{ .Config.plot_general_colorscheme }},
|
||||||
|
plot_general_lineWidth: {{ .Config.plot_general_lineWidth }},
|
||||||
|
plot_general_colorBackground: {{ .Config.plot_general_colorBackground }},
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
<script src='/build/node.js'></script>
|
||||||
|
{{end}}
|
19
templates/monitoring/systems.html
Normal file
19
templates/monitoring/systems.html
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div id="svelte-app"></div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{define "stylesheets"}}
|
||||||
|
<link rel='stylesheet' href='/build/systems.css'>
|
||||||
|
{{end}}
|
||||||
|
{{define "javascript"}}
|
||||||
|
<script>
|
||||||
|
const filterPresets = {{ .FilterPresets }};
|
||||||
|
const clusterCockpitConfigPromise = Promise.resolve({
|
||||||
|
plot_view_plotsPerRow: {{ .Config.plot_view_plotsPerRow }},
|
||||||
|
plot_general_colorscheme: {{ .Config.plot_general_colorscheme }},
|
||||||
|
plot_general_lineWidth: {{ .Config.plot_general_lineWidth }},
|
||||||
|
plot_general_colorBackground: {{ .Config.plot_general_colorBackground }},
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
<script src='/build/systems.js'></script>
|
||||||
|
{{end}}
|
15
templates/monitoring/user.html
Normal file
15
templates/monitoring/user.html
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div id="svelte-app"></div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{define "stylesheets"}}
|
||||||
|
<link rel='stylesheet' href='/build/user.css'>
|
||||||
|
{{end}}
|
||||||
|
{{define "javascript"}}
|
||||||
|
<script>
|
||||||
|
const userInfos = {{ .Infos }};
|
||||||
|
const filterPresets = {{ .FilterPresets }};
|
||||||
|
const clusterCockpitConfig = {{ .Config }};
|
||||||
|
</script>
|
||||||
|
<script src='/build/user.js'></script>
|
||||||
|
{{end}}
|
14
templates/monitoring/users.html
Normal file
14
templates/monitoring/users.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
{{define "content"}}
|
||||||
|
<div id="svelte-app"></div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
{{define "stylesheets"}}
|
||||||
|
<link rel='stylesheet' href='/build/users.css'>
|
||||||
|
{{end}}
|
||||||
|
{{define "javascript"}}
|
||||||
|
<script>
|
||||||
|
const filterPresets = null;
|
||||||
|
const clusterCockpitConfigPromise = Promise.resolve({});
|
||||||
|
</script>
|
||||||
|
<script src='/build/users.js'></script>
|
||||||
|
{{end}}
|
56
templates/templates.go
Normal file
56
templates/templates.go
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
package templates
|
||||||
|
|
||||||
|
import (
|
||||||
|
"html/template"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
var templatesDir string
|
||||||
|
var debugMode bool = true
|
||||||
|
var templates map[string]*template.Template = map[string]*template.Template{}
|
||||||
|
|
||||||
|
type Page struct {
|
||||||
|
Title string
|
||||||
|
Login *LoginPage
|
||||||
|
FilterPresets map[string]interface{}
|
||||||
|
Infos map[string]interface{}
|
||||||
|
Config map[string]interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
type LoginPage struct {
|
||||||
|
Error string
|
||||||
|
Info string
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
templatesDir = "./templates/"
|
||||||
|
base := template.Must(template.ParseFiles(templatesDir + "base.html"))
|
||||||
|
files := []string{
|
||||||
|
"home.html", "404.html", "login.html",
|
||||||
|
"monitoring/jobs.html", "monitoring/job.html",
|
||||||
|
"monitoring/users.html", "monitoring/user.html",
|
||||||
|
"monitoring/analysis.html",
|
||||||
|
"monitoring/systems.html",
|
||||||
|
"monitoring/node.html",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
templates[file] = template.Must(template.Must(base.Clone()).ParseFiles(templatesDir + file))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Render(rw http.ResponseWriter, r *http.Request, file string, page *Page) {
|
||||||
|
t, ok := templates[file]
|
||||||
|
if !ok {
|
||||||
|
panic("templates must be predefinied!")
|
||||||
|
}
|
||||||
|
|
||||||
|
if debugMode {
|
||||||
|
t = template.Must(template.ParseFiles(templatesDir+"base.html", templatesDir+file))
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := t.Execute(rw, page); err != nil {
|
||||||
|
log.Printf("template error: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
}
|
40
utils/add-job.mjs
Normal file
40
utils/add-job.mjs
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import fetch from 'node-fetch'
|
||||||
|
|
||||||
|
// Just for testing
|
||||||
|
|
||||||
|
const job = {
|
||||||
|
jobId: 123,
|
||||||
|
user: 'lou',
|
||||||
|
project: 'testproj',
|
||||||
|
cluster: 'heidi',
|
||||||
|
partition: 'default',
|
||||||
|
arrayJobId: 0,
|
||||||
|
numNodes: 1,
|
||||||
|
numHwthreads: 8,
|
||||||
|
numAcc: 0,
|
||||||
|
exclusive: 1,
|
||||||
|
monitoringStatus: 1,
|
||||||
|
smt: 1,
|
||||||
|
jobState: 'running',
|
||||||
|
duration: 2*60*60,
|
||||||
|
tags: [],
|
||||||
|
resources: [
|
||||||
|
{
|
||||||
|
hostname: 'heidi',
|
||||||
|
hwthreads: [0, 1, 2, 3, 4, 5, 6, 7]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
metaData: null,
|
||||||
|
startTime: 1641427200
|
||||||
|
}
|
||||||
|
|
||||||
|
fetch('http://localhost:8080/api/jobs/start_job/', {
|
||||||
|
method: 'POST',
|
||||||
|
body: JSON.stringify(job),
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': 'Bearer eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJpc19hZG1pbiI6dHJ1ZSwiaXNfYXBpIjpmYWxzZSwic3ViIjoibG91In0.nY6dCgLSdm7zXz1xPkrb_3JnnUCgExXeXcrTlAAySs4p72VKJhmzzC1RxgkJE26l8tDYUilM-o-urzlaqK5aDA'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.then(res => res.status == 200 ? res.json() : res.text())
|
||||||
|
.then(res => console.log(res))
|
@ -1 +0,0 @@
|
|||||||
{"analysis_view_histogramMetrics":["flops_any","mem_bw","mem_used"],"analysis_view_scatterPlotMetrics":[["flops_any","mem_bw"],["flops_any","cpu_load"],["cpu_load","mem_bw"]],"job_view_nodestats_selectedMetrics":["flops_any","mem_bw","mem_used"],"job_view_polarPlotMetrics":["flops_any","mem_bw","mem_used","net_bw","file_bw"],"job_view_selectedMetrics":["flops_any","mem_bw","mem_used"],"plot_general_colorBackground":true,"plot_general_colorscheme":["#00bfff","#0000ff","#ff00ff","#ff0000","#ff8000","#ffff00","#80ff00"],"plot_general_lineWidth":1,"plot_list_jobsPerPage":10,"plot_list_selectedMetrics":["cpu_load","mem_used","flops_any","mem_bw","clock"],"plot_view_plotsPerRow":4,"plot_view_showPolarplot":true,"plot_view_showRoofline":true,"plot_view_showStatTable":true}
|
|
Loading…
x
Reference in New Issue
Block a user