mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-25 21:09:05 +01:00
commit
ff24d946fd
4
.env
Normal file
4
.env
Normal file
@ -0,0 +1,4 @@
|
||||
export JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
export JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
export SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
export LDAP_ADMIN_PASSWORD="mashup"
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -1,3 +1,3 @@
|
||||
[submodule "frontend"]
|
||||
path = frontend
|
||||
url = git@github.com:ClusterCockpit/cc-svelte-datatable.git
|
||||
url = git@github.com:ClusterCockpit/cc-frontend.git
|
||||
|
18
README.md
18
README.md
@ -1,7 +1,11 @@
|
||||
# ClusterCockpit with a Golang backend (Only supports archived jobs)
|
||||
# ClusterCockpit with a Golang backend
|
||||
|
||||
__*DOES NOT WORK WITH CURRENT FRONTEND*__
|
||||
|
||||
[![Build](https://github.com/ClusterCockpit/cc-jobarchive/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-jobarchive/actions/workflows/test.yml)
|
||||
|
||||
Create your job-archive accoring to [this specification](https://github.com/ClusterCockpit/cc-specifications). At least one cluster with a valid `cluster.json` file is required. Having no jobs in the job-archive at all is fine. You may use the sample job-archive available for download [in cc-docker/develop](https://github.com/ClusterCockpit/cc-docker/tree/develop).
|
||||
|
||||
### Run server
|
||||
|
||||
```sh
|
||||
@ -27,13 +31,23 @@ touch ./var/job.db
|
||||
# This will first initialize the job.db database by traversing all
|
||||
# `meta.json` files in the job-archive. After that, a HTTP server on
|
||||
# the port 8080 will be running. The `--init-db` is only needed the first time.
|
||||
./cc-jobarchive --init-db
|
||||
./cc-jobarchive --init-db --add-user <your-username>:admin:<your-password>
|
||||
|
||||
# Show other options:
|
||||
./cc-jobarchive --help
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
A config file in the JSON format can be provided using `--config` to override the defaults. Look at the beginning of `server.go` for the defaults and consequently the format of the configuration file.
|
||||
|
||||
### Update GraphQL schema
|
||||
|
||||
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API. The schema can be found in `./graph/schema.graphqls`. After changing it, you need to run `go run github.com/99designs/gqlgen` which will update `graph/model`. In case new resolvers are needed, they will be inserted into `graph/schema.resolvers.go`, where you will need to implement them.
|
||||
|
||||
### TODO
|
||||
|
||||
- [ ] Documentation
|
||||
- [ ] Write more TODOs
|
||||
- [ ] Caching
|
||||
- [ ] Generate JWTs based on the provided keys
|
||||
|
171
api/openapi.yaml
Normal file
171
api/openapi.yaml
Normal file
@ -0,0 +1,171 @@
|
||||
#
|
||||
# ClusterCockpit's API spec can be exported via:
|
||||
# docker exec -it cc-php php bin/console api:openapi:export --yaml
|
||||
#
|
||||
# This spec is written by hand and hopefully up to date with the API.
|
||||
#
|
||||
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: 'ClusterCockpit REST API'
|
||||
description: 'API for batch job control'
|
||||
version: 0.0.2
|
||||
servers:
|
||||
- url: /
|
||||
description: ''
|
||||
paths:
|
||||
'/api/jobs/{id}':
|
||||
get:
|
||||
operationId: 'getJob'
|
||||
summary: 'Get job resource'
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema: { type: integer }
|
||||
description: 'Database ID (Resource Identifier)'
|
||||
responses:
|
||||
200:
|
||||
description: 'Job resource'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
404:
|
||||
description: 'Resource not found'
|
||||
'/api/jobs/tag_job/{id}':
|
||||
post:
|
||||
operationId: 'tagJob'
|
||||
summary: 'Add a tag to a job'
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema: { type: integer }
|
||||
description: 'Job ID'
|
||||
requestBody:
|
||||
description: 'Array of tags to add'
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Tag'
|
||||
responses:
|
||||
200:
|
||||
description: 'Job resource'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
404:
|
||||
description: 'Job or tag does not exist'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
'/api/jobs/start_job/':
|
||||
post:
|
||||
operationId: 'startJob'
|
||||
summary: 'Add a newly started job'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
responses:
|
||||
201:
|
||||
description: 'Job successfully'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
description: 'The database ID assigned to this job'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
422:
|
||||
description: 'The combination of jobId, clusterId and startTime does already exist'
|
||||
'/api/jobs/stop_job/':
|
||||
post:
|
||||
operationId: stopJobViaJobID
|
||||
summary: 'Mark a job as stopped. Which job to stop is specified by the request body.'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
required: [jobId, cluster, startTime, stopTime]
|
||||
properties:
|
||||
jobId: { type: integer }
|
||||
cluster: { type: string }
|
||||
startTime: { type: integer }
|
||||
stopTime: { type: integer }
|
||||
responses:
|
||||
200:
|
||||
description: 'Job resource'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
404:
|
||||
description: 'Resource not found'
|
||||
'/api/jobs/stop_job/{id}':
|
||||
post:
|
||||
operationId: 'stopJobViaDBID'
|
||||
summary: 'Mark a job as stopped.'
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema: { type: integer }
|
||||
description: 'Database ID (Resource Identifier)'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
required: [stopTime]
|
||||
properties:
|
||||
stopTime: { type: integer }
|
||||
responses:
|
||||
200:
|
||||
description: 'Job resource'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
404:
|
||||
description: 'Resource not found'
|
||||
components:
|
||||
schemas:
|
||||
Tag:
|
||||
description: 'A job tag'
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 'Database ID'
|
||||
type:
|
||||
type: string
|
||||
description: 'Tag type'
|
||||
name:
|
||||
type: string
|
||||
description: 'Tag name'
|
||||
Job:
|
||||
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
|
||||
securitySchemes:
|
||||
bearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
security:
|
||||
- bearerAuth: [] # Applies `bearerAuth` globally
|
340
api/rest.go
Normal file
340
api/rest.go
Normal file
@ -0,0 +1,340 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
type RestApi struct {
|
||||
DB *sqlx.DB
|
||||
Resolver *graph.Resolver
|
||||
AsyncArchiving bool
|
||||
MachineStateDir string
|
||||
}
|
||||
|
||||
func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||
r = r.PathPrefix("/api").Subrouter()
|
||||
r.StrictSlash(true)
|
||||
|
||||
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/stop_job/", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/stop_job/{id}", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||
|
||||
r.HandleFunc("/jobs/{id}", api.getJob).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
|
||||
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
|
||||
}
|
||||
|
||||
type StartJobApiRespone struct {
|
||||
DBID int64 `json:"id"`
|
||||
}
|
||||
|
||||
type StopJobApiRequest struct {
|
||||
// JobId, ClusterId and StartTime are optional.
|
||||
// They are only used if no database id was provided.
|
||||
JobId *string `json:"jobId"`
|
||||
Cluster *string `json:"cluster"`
|
||||
StartTime *int64 `json:"startTime"`
|
||||
|
||||
// Payload
|
||||
StopTime int64 `json:"stopTime"`
|
||||
State schema.JobState `json:"jobState"`
|
||||
}
|
||||
|
||||
type TagJobApiRequest []*struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
func (api *RestApi) getJob(rw http.ResponseWriter, r *http.Request) {
|
||||
id := mux.Vars(r)["id"]
|
||||
|
||||
job, err := api.Resolver.Query().Job(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.Resolver.Job().Tags(r.Context(), job)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
}
|
||||
|
||||
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
id := mux.Vars(r)["id"]
|
||||
job, err := api.Resolver.Query().Job(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.Resolver.Job().Tags(r.Context(), job)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
var req TagJobApiRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
for _, tag := range req {
|
||||
var tagId int64
|
||||
if err := sq.Select("id").From("tag").
|
||||
Where("tag.tag_type = ?", tag.Type).Where("tag.tag_name = ?", tag.Name).
|
||||
RunWith(api.DB).QueryRow().Scan(&tagId); err != nil {
|
||||
http.Error(rw, fmt.Sprintf("the tag '%s:%s' does not exist", tag.Type, tag.Name), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := api.DB.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, job.ID, tagId); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags = append(job.Tags, &schema.Tag{
|
||||
ID: tagId,
|
||||
Type: tag.Type,
|
||||
Name: tag.Name,
|
||||
})
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
}
|
||||
|
||||
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
req := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if config.GetClusterConfig(req.Cluster) == nil {
|
||||
http.Error(rw, fmt.Sprintf("cluster '%s' does not exist", req.Cluster), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if len(req.Resources) == 0 || len(req.User) == 0 || req.NumNodes == 0 {
|
||||
http.Error(rw, "required fields are missing", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if combination of (job_id, cluster_id, start_time) already exists:
|
||||
rows, err := api.DB.Query(`SELECT job.id FROM job WHERE job.job_id = ? AND job.cluster = ? AND job.start_time = ?`,
|
||||
req.JobID, req.Cluster, req.StartTime)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if rows.Next() {
|
||||
var id int64 = -1
|
||||
rows.Scan(&id)
|
||||
http.Error(rw, fmt.Sprintf("a job with that job_id, cluster_id and start_time already exists (database id: %d)", id), http.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
|
||||
req.RawResources, err = json.Marshal(req.Resources)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
res, err := api.DB.NamedExec(`INSERT INTO job (
|
||||
job_id, user, project, cluster, partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :resources, :meta_data
|
||||
);`, req)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d\n", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusCreated)
|
||||
json.NewEncoder(rw).Encode(StartJobApiRespone{
|
||||
DBID: id,
|
||||
})
|
||||
}
|
||||
|
||||
func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||
req := StopJobApiRequest{}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
var err error
|
||||
var sql string
|
||||
var args []interface{}
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
if ok {
|
||||
sql, args, err = sq.Select(schema.JobColumns...).From("job").Where("job.id = ?", id).ToSql()
|
||||
} else {
|
||||
sql, args, err = sq.Select(schema.JobColumns...).From("job").
|
||||
Where("job.job_id = ?", req.JobId).
|
||||
Where("job.cluster = ?", req.Cluster).
|
||||
Where("job.start_time = ?", req.StartTime).ToSql()
|
||||
}
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
job, err := schema.ScanJob(api.DB.QueryRowx(sql, args...))
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
|
||||
http.Error(rw, "stop_time must be larger than start_time and only running jobs can be stopped", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if req.State != "" && !req.State.Valid() {
|
||||
http.Error(rw, fmt.Sprintf("invalid job state: '%s'", req.State), http.StatusBadRequest)
|
||||
return
|
||||
} else {
|
||||
req.State = schema.JobStateCompleted
|
||||
}
|
||||
|
||||
doArchiving := func(job *schema.Job, ctx context.Context) error {
|
||||
job.Duration = int32(req.StopTime - job.StartTime.Unix())
|
||||
jobMeta, err := metricdata.ArchiveJob(job, ctx)
|
||||
if err != nil {
|
||||
log.Printf("archiving job (dbid: %d) failed: %s\n", job.ID, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", req.State).
|
||||
Set("duration", job.Duration).
|
||||
Where("job.id = ?", job.ID)
|
||||
|
||||
for metric, stats := range jobMeta.Statistics {
|
||||
switch metric {
|
||||
case "flops_any":
|
||||
stmt = stmt.Set("flops_any_avg", stats.Avg)
|
||||
case "mem_used":
|
||||
stmt = stmt.Set("mem_used_max", stats.Max)
|
||||
case "mem_bw":
|
||||
stmt = stmt.Set("mem_bw_avg", stats.Avg)
|
||||
case "load":
|
||||
stmt = stmt.Set("load_avg", stats.Avg)
|
||||
case "net_bw":
|
||||
stmt = stmt.Set("net_bw_avg", stats.Avg)
|
||||
case "file_bw":
|
||||
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
||||
}
|
||||
}
|
||||
|
||||
sql, args, err := stmt.ToSql()
|
||||
if err != nil {
|
||||
log.Printf("archiving job (dbid: %d) failed: %s\n", job.ID, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := api.DB.Exec(sql, args...); err != nil {
|
||||
log.Printf("archiving job (dbid: %d) failed: %s\n", job.ID, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
log.Printf("job stopped and archived (dbid: %d)\n", job.ID)
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s\n", job.ID, job.Cluster, job.JobID, job.User, job.StartTime)
|
||||
if api.AsyncArchiving {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
go doArchiving(job, context.Background())
|
||||
} else {
|
||||
err := doArchiving(job, r.Context())
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
} else {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
if api.MachineStateDir == "" {
|
||||
http.Error(rw, "not enabled", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
vars := mux.Vars(r)
|
||||
cluster := vars["cluster"]
|
||||
host := vars["host"]
|
||||
dir := filepath.Join(api.MachineStateDir, cluster)
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
filename := filepath.Join(dir, fmt.Sprintf("%s.json", host))
|
||||
f, err := os.Create(filename)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if _, err := io.Copy(f, r.Body); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusCreated)
|
||||
}
|
||||
|
||||
func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
if api.MachineStateDir == "" {
|
||||
http.Error(rw, "not enabled", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
vars := mux.Vars(r)
|
||||
filename := filepath.Join(api.MachineStateDir, vars["cluster"], fmt.Sprintf("%s.json", vars["host"]))
|
||||
|
||||
// Sets the content-type and 'Last-Modified' Header and so on automatically
|
||||
http.ServeFile(rw, r, filename)
|
||||
}
|
339
auth/auth.go
Normal file
339
auth/auth.go
Normal file
@ -0,0 +1,339 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/templates"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/golang-jwt/jwt/v4"
|
||||
"github.com/gorilla/sessions"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
)
|
||||
|
||||
type User struct {
|
||||
Username string
|
||||
Password string
|
||||
Name string
|
||||
IsAdmin bool
|
||||
IsAPIUser bool
|
||||
ViaLdap bool
|
||||
Email string
|
||||
}
|
||||
|
||||
type ContextKey string
|
||||
|
||||
const ContextUserKey ContextKey = "user"
|
||||
|
||||
var JwtPublicKey ed25519.PublicKey
|
||||
var JwtPrivateKey ed25519.PrivateKey
|
||||
|
||||
var sessionStore *sessions.CookieStore
|
||||
|
||||
func Init(db *sqlx.DB, ldapConfig *LdapConfig) error {
|
||||
_, err := db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
username varchar(255) PRIMARY KEY,
|
||||
password varchar(255) DEFAULT NULL,
|
||||
ldap tinyint DEFAULT 0,
|
||||
name varchar(255) DEFAULT NULL,
|
||||
roles varchar(255) DEFAULT NULL,
|
||||
email varchar(255) DEFAULT NULL);`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Println("warning: environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
return err
|
||||
}
|
||||
sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||
if pubKey == "" || privKey == "" {
|
||||
log.Println("warning: environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
JwtPublicKey = ed25519.PublicKey(bytes)
|
||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
JwtPrivateKey = ed25519.PrivateKey(bytes)
|
||||
}
|
||||
|
||||
if ldapConfig != nil {
|
||||
if err := initLdap(ldapConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// arg must be formated like this: "<username>:[admin]:<password>"
|
||||
func AddUserToDB(db *sqlx.DB, arg string) error {
|
||||
parts := strings.SplitN(arg, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 || len(parts[2]) == 0 || !(len(parts[1]) == 0 || parts[1] == "admin") {
|
||||
return errors.New("invalid argument format")
|
||||
}
|
||||
|
||||
password, err := bcrypt.GenerateFromPassword([]byte(parts[2]), bcrypt.DefaultCost)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
roles := "[]"
|
||||
if parts[1] == "admin" {
|
||||
roles = "[\"ROLE_ADMIN\"]"
|
||||
}
|
||||
if parts[1] == "api" {
|
||||
roles = "[\"ROLE_API\"]"
|
||||
}
|
||||
|
||||
_, err = sq.Insert("user").Columns("username", "password", "roles").Values(parts[0], string(password), roles).RunWith(db).Exec()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Printf("new user '%s' added (roles: %s)\n", parts[0], roles)
|
||||
return nil
|
||||
}
|
||||
|
||||
func DelUserFromDB(db *sqlx.DB, username string) error {
|
||||
_, err := db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
||||
return err
|
||||
}
|
||||
|
||||
func FetchUserFromDB(db *sqlx.DB, username string) (*User, error) {
|
||||
user := &User{Username: username}
|
||||
var hashedPassword, name, rawRoles, email sql.NullString
|
||||
if err := sq.Select("password", "ldap", "name", "roles", "email").From("user").
|
||||
Where("user.username = ?", username).RunWith(db).
|
||||
QueryRow().Scan(&hashedPassword, &user.ViaLdap, &name, &rawRoles, &email); err != nil {
|
||||
return nil, fmt.Errorf("user '%s' not found (%s)", username, err.Error())
|
||||
}
|
||||
|
||||
user.Password = hashedPassword.String
|
||||
user.Name = name.String
|
||||
user.Email = email.String
|
||||
var roles []string
|
||||
if rawRoles.Valid {
|
||||
json.Unmarshal([]byte(rawRoles.String), &roles)
|
||||
}
|
||||
for _, role := range roles {
|
||||
switch role {
|
||||
case "ROLE_ADMIN":
|
||||
user.IsAdmin = true
|
||||
case "ROLE_API":
|
||||
user.IsAPIUser = true
|
||||
}
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// Handle a POST request that should log the user in,
|
||||
// starting a new session.
|
||||
func Login(db *sqlx.DB) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
username, password := r.FormValue("username"), r.FormValue("password")
|
||||
user, err := FetchUserFromDB(db, username)
|
||||
if err == nil && user.ViaLdap && ldapAuthEnabled {
|
||||
err = loginViaLdap(user, password)
|
||||
} else if err == nil && !user.ViaLdap && user.Password != "" {
|
||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(password)); e != nil {
|
||||
err = fmt.Errorf("user '%s' provided the wrong password (%s)", username, e.Error())
|
||||
}
|
||||
} else {
|
||||
err = errors.New("could not authenticate user")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Printf("login failed: %s\n", err.Error())
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
templates.Render(rw, r, "login.html", &templates.Page{
|
||||
Title: "Login failed",
|
||||
Login: &templates.LoginPage{
|
||||
Error: "Username or password incorrect",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
session, err := sessionStore.New(r, "session")
|
||||
if err != nil {
|
||||
log.Printf("session creation failed: %s\n", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
session.Values["username"] = user.Username
|
||||
session.Values["is_admin"] = user.IsAdmin
|
||||
if err := sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Printf("session save failed: %s\n", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("login successfull: user: %#v\n", user)
|
||||
http.Redirect(rw, r, "/", http.StatusTemporaryRedirect)
|
||||
})
|
||||
}
|
||||
|
||||
var ErrTokenInvalid error = errors.New("invalid token")
|
||||
|
||||
func authViaToken(r *http.Request) (*User, error) {
|
||||
if JwtPublicKey == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rawtoken := r.Header.Get("X-Auth-Token")
|
||||
if rawtoken == "" {
|
||||
rawtoken = r.Header.Get("Authorization")
|
||||
prefix := "Bearer "
|
||||
if !strings.HasPrefix(rawtoken, prefix) {
|
||||
return nil, nil
|
||||
}
|
||||
rawtoken = rawtoken[len(prefix):]
|
||||
}
|
||||
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||
if t.Method != jwt.SigningMethodEdDSA {
|
||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||
}
|
||||
return JwtPublicKey, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, ErrTokenInvalid
|
||||
}
|
||||
|
||||
if err := token.Claims.Valid(); err != nil {
|
||||
return nil, ErrTokenInvalid
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
isAdmin, _ := claims["is_admin"].(bool)
|
||||
isAPIUser, _ := claims["is_api"].(bool)
|
||||
return &User{
|
||||
Username: sub,
|
||||
IsAdmin: isAdmin,
|
||||
IsAPIUser: isAPIUser,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Authenticate the user and put a User object in the
|
||||
// context of the request. If authentication fails,
|
||||
// do not continue but send client to the login screen.
|
||||
func Auth(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := authViaToken(r)
|
||||
if err == ErrTokenInvalid {
|
||||
log.Printf("authentication failed: invalid token\n")
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), ContextUserKey, user)
|
||||
next.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
session, err := sessionStore.Get(r, "session")
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if session.IsNew {
|
||||
log.Printf("authentication failed: no session or jwt found\n")
|
||||
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
templates.Render(rw, r, "login.html", &templates.Page{
|
||||
Title: "Authentication failed",
|
||||
Login: &templates.LoginPage{
|
||||
Error: "No valid session or JWT provided",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
ctx := context.WithValue(r.Context(), ContextUserKey, &User{
|
||||
Username: session.Values["username"].(string),
|
||||
IsAdmin: session.Values["is_admin"].(bool),
|
||||
})
|
||||
next.ServeHTTP(rw, r.WithContext(ctx))
|
||||
})
|
||||
}
|
||||
|
||||
// Generate a new JWT that can be used for authentication
|
||||
func ProvideJWT(user *User) (string, error) {
|
||||
if JwtPrivateKey == nil {
|
||||
return "", errors.New("environment variable 'JWT_PUBLIC_KEY' not set")
|
||||
}
|
||||
|
||||
tok := jwt.NewWithClaims(jwt.SigningMethodEdDSA, jwt.MapClaims{
|
||||
"sub": user.Username,
|
||||
"is_admin": user.IsAdmin,
|
||||
"is_api": user.IsAPIUser,
|
||||
})
|
||||
|
||||
return tok.SignedString(JwtPrivateKey)
|
||||
}
|
||||
|
||||
func GetUser(ctx context.Context) *User {
|
||||
x := ctx.Value(ContextUserKey)
|
||||
if x == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return x.(*User)
|
||||
}
|
||||
|
||||
// Clears the session cookie
|
||||
func Logout(rw http.ResponseWriter, r *http.Request) {
|
||||
session, err := sessionStore.Get(r, "session")
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if !session.IsNew {
|
||||
session.Options.MaxAge = -1
|
||||
if err := sessionStore.Save(r, rw, session); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
templates.Render(rw, r, "login.html", &templates.Page{
|
||||
Title: "Logout successful",
|
||||
Login: &templates.LoginPage{
|
||||
Info: "Logout successful",
|
||||
},
|
||||
})
|
||||
}
|
183
auth/ldap.go
Normal file
183
auth/ldap.go
Normal file
@ -0,0 +1,183 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/go-ldap/ldap/v3"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
type LdapConfig struct {
|
||||
Url string `json:"url"`
|
||||
UserBase string `json:"user_base"`
|
||||
SearchDN string `json:"search_dn"`
|
||||
UserBind string `json:"user_bind"`
|
||||
UserFilter string `json:"user_filter"`
|
||||
TLS bool `json:"tls"`
|
||||
}
|
||||
|
||||
var ldapAuthEnabled bool = false
|
||||
var ldapConfig *LdapConfig
|
||||
var ldapAdminPassword string
|
||||
|
||||
func initLdap(config *LdapConfig) error {
|
||||
ldapAdminPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
|
||||
if ldapAdminPassword == "" {
|
||||
log.Println("warning: environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync or authentication will not work)")
|
||||
}
|
||||
|
||||
ldapConfig = config
|
||||
ldapAuthEnabled = true
|
||||
return nil
|
||||
}
|
||||
|
||||
var ldapConnectionsLock sync.Mutex
|
||||
var ldapConnections []*ldap.Conn = []*ldap.Conn{}
|
||||
|
||||
// TODO: Add a connection pool or something like
|
||||
// that so that connections can be reused/cached.
|
||||
func getLdapConnection() (*ldap.Conn, error) {
|
||||
ldapConnectionsLock.Lock()
|
||||
n := len(ldapConnections)
|
||||
if n > 0 {
|
||||
conn := ldapConnections[n-1]
|
||||
ldapConnections = ldapConnections[:n-1]
|
||||
ldapConnectionsLock.Unlock()
|
||||
return conn, nil
|
||||
}
|
||||
ldapConnectionsLock.Unlock()
|
||||
|
||||
conn, err := ldap.DialURL(ldapConfig.Url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ldapConfig.TLS {
|
||||
if err := conn.StartTLS(&tls.Config{InsecureSkipVerify: true}); err != nil {
|
||||
conn.Close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := conn.Bind(ldapConfig.SearchDN, ldapAdminPassword); err != nil {
|
||||
conn.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func releaseConnection(conn *ldap.Conn) {
|
||||
// Re-bind to the user we can run queries with
|
||||
if err := conn.Bind(ldapConfig.SearchDN, ldapAdminPassword); err != nil {
|
||||
conn.Close()
|
||||
log.Printf("ldap error: %s", err.Error())
|
||||
}
|
||||
|
||||
ldapConnectionsLock.Lock()
|
||||
defer ldapConnectionsLock.Unlock()
|
||||
|
||||
n := len(ldapConnections)
|
||||
if n > 2 {
|
||||
conn.Close()
|
||||
return
|
||||
}
|
||||
|
||||
ldapConnections = append(ldapConnections, conn)
|
||||
}
|
||||
|
||||
func loginViaLdap(user *User, password string) error {
|
||||
l, err := getLdapConnection()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer releaseConnection(l)
|
||||
|
||||
userDn := strings.Replace(ldapConfig.UserBind, "{username}", user.Username, -1)
|
||||
if err := l.Bind(userDn, password); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
user.ViaLdap = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete users where user.ldap is 1 and that do not show up in the ldap search results.
|
||||
// Add users to the users table that are new in the ldap search results.
|
||||
func SyncWithLDAP(db *sqlx.DB) error {
|
||||
if !ldapAuthEnabled {
|
||||
return errors.New("ldap not enabled")
|
||||
}
|
||||
|
||||
const IN_DB int = 1
|
||||
const IN_LDAP int = 2
|
||||
const IN_BOTH int = 3
|
||||
|
||||
users := map[string]int{}
|
||||
rows, err := db.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var username string
|
||||
if err := rows.Scan(&username); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
users[username] = IN_DB
|
||||
}
|
||||
|
||||
l, err := getLdapConnection()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer releaseConnection(l)
|
||||
|
||||
ldapResults, err := l.Search(ldap.NewSearchRequest(
|
||||
ldapConfig.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
|
||||
ldapConfig.UserFilter, []string{"dn", "uid", "gecos"}, nil))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newnames := map[string]string{}
|
||||
for _, entry := range ldapResults.Entries {
|
||||
username := entry.GetAttributeValue("uid")
|
||||
if username == "" {
|
||||
return errors.New("no attribute 'uid'")
|
||||
}
|
||||
|
||||
_, ok := users[username]
|
||||
if !ok {
|
||||
users[username] = IN_LDAP
|
||||
newnames[username] = entry.GetAttributeValue("gecos")
|
||||
} else {
|
||||
users[username] = IN_BOTH
|
||||
}
|
||||
}
|
||||
|
||||
for username, where := range users {
|
||||
if where == IN_DB {
|
||||
fmt.Printf("ldap-sync: remove '%s' (does not show up in LDAP anymore)\n", username)
|
||||
if _, err := db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if where == IN_LDAP {
|
||||
name := newnames[username]
|
||||
fmt.Printf("ldap-sync: add '%s' (name: '%s', roles: [], ldap: true)\n", username, name)
|
||||
if _, err := db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`,
|
||||
username, 1, name, "[]"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
152
config/config.go
152
config/config.go
@ -3,82 +3,160 @@ package config
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
var db *sqlx.DB
|
||||
var lock sync.RWMutex
|
||||
var config map[string]interface{}
|
||||
var uiDefaults map[string]interface{}
|
||||
|
||||
var Clusters []*model.Cluster
|
||||
|
||||
const configFilePath string = "./var/ui.config.json"
|
||||
|
||||
func init() {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
|
||||
bytes, err := os.ReadFile(configFilePath)
|
||||
func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, jobArchive string) error {
|
||||
db = usersdb
|
||||
uiDefaults = uiConfig
|
||||
entries, err := os.ReadDir(jobArchive)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bytes, &config); err != nil {
|
||||
log.Fatal(err)
|
||||
Clusters = []*model.Cluster{}
|
||||
for _, de := range entries {
|
||||
bytes, err := os.ReadFile(filepath.Join(jobArchive, de.Name(), "cluster.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var cluster model.Cluster
|
||||
if err := json.Unmarshal(bytes, &cluster); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cluster.FilterRanges.StartTime.To.IsZero() {
|
||||
cluster.FilterRanges.StartTime.To = time.Unix(0, 0)
|
||||
}
|
||||
|
||||
if cluster.Name != de.Name() {
|
||||
return fmt.Errorf("the file '%s/cluster.json' contains the clusterId '%s'", de.Name(), cluster.Name)
|
||||
}
|
||||
|
||||
Clusters = append(Clusters, &cluster)
|
||||
}
|
||||
|
||||
if authEnabled {
|
||||
_, err := db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS configuration (
|
||||
username varchar(255),
|
||||
key varchar(255),
|
||||
value varchar(255),
|
||||
PRIMARY KEY (username, key),
|
||||
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Call this function to change the current configuration.
|
||||
// `value` must be valid JSON. This This function is thread-safe.
|
||||
func UpdateConfig(key, value string, ctx context.Context) error {
|
||||
var v interface{}
|
||||
if err := json.Unmarshal([]byte(value), &v); err != nil {
|
||||
return err
|
||||
// Return the personalised UI config for the currently authenticated
|
||||
// user or return the plain default config.
|
||||
func GetUIConfig(r *http.Request) (map[string]interface{}, error) {
|
||||
lock.RLock()
|
||||
config := make(map[string]interface{}, len(uiDefaults))
|
||||
for k, v := range uiDefaults {
|
||||
config[k] = v
|
||||
}
|
||||
lock.RUnlock()
|
||||
|
||||
user := auth.GetUser(r.Context())
|
||||
if user == nil {
|
||||
return config, nil
|
||||
}
|
||||
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
|
||||
config[key] = v
|
||||
bytes, err := json.Marshal(config)
|
||||
rows, err := db.Query(`SELECT key, value FROM configuration WHERE configuration.username = ?`, user.Username)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := os.WriteFile(configFilePath, bytes, 0644); err != nil {
|
||||
for rows.Next() {
|
||||
var key, rawval string
|
||||
if err := rows.Scan(&key, &rawval); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
config[key] = val
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
// If the context does not have a user, update the global ui configuration without persisting it!
|
||||
// If there is a (authenticated) user, update only his configuration.
|
||||
func UpdateConfig(key, value string, ctx context.Context) error {
|
||||
user := auth.GetUser(ctx)
|
||||
if user == nil {
|
||||
lock.RLock()
|
||||
defer lock.RUnlock()
|
||||
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uiDefaults[key] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := db.Exec(`REPLACE INTO configuration (username, key, value) VALUES (?, ?, ?)`,
|
||||
user.Username, key, value); err != nil {
|
||||
log.Printf("db.Exec: %s\n", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// http.HandlerFunc compatible function that serves the current configuration as JSON
|
||||
func ServeConfig(rw http.ResponseWriter, r *http.Request) {
|
||||
lock.RLock()
|
||||
defer lock.RUnlock()
|
||||
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(rw).Encode(config); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
func GetClusterConfig(cluster string) *model.Cluster {
|
||||
for _, c := range Clusters {
|
||||
if c.ClusterID == cluster {
|
||||
if c.Name == cluster {
|
||||
return c
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetPartition(cluster, partition string) *model.Partition {
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, p := range c.Partitions {
|
||||
if p.Name == partition {
|
||||
return p
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetMetricConfig(cluster, metric string) *model.MetricConfig {
|
||||
for _, c := range Clusters {
|
||||
if c.ClusterID == cluster {
|
||||
if c.Name == cluster {
|
||||
for _, m := range c.MetricConfig {
|
||||
if m.Name == metric {
|
||||
return m
|
||||
|
2
frontend
2
frontend
@ -1 +1 @@
|
||||
Subproject commit b487af3496b46942d9848337bc2821575a1390b2
|
||||
Subproject commit cc48461a810dbd3565000150fc99332743de92ba
|
8
go.mod
8
go.mod
@ -5,9 +5,15 @@ go 1.15
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.13.0
|
||||
github.com/Masterminds/squirrel v1.5.1
|
||||
github.com/go-ldap/ldap/v3 v3.4.1
|
||||
github.com/golang-jwt/jwt/v4 v4.1.0
|
||||
github.com/gorilla/handlers v1.5.1
|
||||
github.com/gorilla/mux v1.6.1
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/gorilla/sessions v1.2.1
|
||||
github.com/jmoiron/sqlx v1.3.1
|
||||
github.com/mattn/go-sqlite3 v1.14.6
|
||||
github.com/stretchr/testify v1.5.1 // indirect
|
||||
github.com/vektah/gqlparser/v2 v2.1.0
|
||||
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871
|
||||
gopkg.in/yaml.v2 v2.3.0 // indirect
|
||||
)
|
||||
|
32
go.sum
32
go.sum
@ -1,5 +1,7 @@
|
||||
github.com/99designs/gqlgen v0.13.0 h1:haLTcUp3Vwp80xMVEg5KRNwzfUrgFdRmtBY8fuB8scA=
|
||||
github.com/99designs/gqlgen v0.13.0/go.mod h1:NV130r6f4tpRWuAI+zsrSdooO/eWUv+Gyyoi3rEfXIk=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c h1:/IBSNwUN8+eKzUzbJPqhK839ygXJ82sde8x3ogr6R28=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/Masterminds/squirrel v1.5.1 h1:kWAKlLLJFxZG7N2E0mBMNWVp5AuUX+JUrnhFN74Eg+w=
|
||||
github.com/Masterminds/squirrel v1.5.1/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
|
||||
@ -19,16 +21,26 @@ github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c h1:TUuUh0Xgj97tLMN
|
||||
github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||
github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ=
|
||||
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||
github.com/go-chi/chi v3.3.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
|
||||
github.com/go-ldap/ldap/v3 v3.4.1 h1:fU/0xli6HY02ocbMuozHAYsaHLcnkLjvho2r5a34BUU=
|
||||
github.com/go-ldap/ldap/v3 v3.4.1/go.mod h1:iYS1MdmrmceOJ1QOTnRXrIs7i3kloqtmGQjRvjKpyMg=
|
||||
github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs=
|
||||
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
|
||||
github.com/gogo/protobuf v1.0.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f h1:9oNbS1z4rVpbnkHBdPZU4jo9bSmrLpII768arSyMFgk=
|
||||
github.com/golang-jwt/jwt/v4 v4.1.0 h1:XUgk2Ex5veyVFVeLm0xhusUTQybEbexJXrvPNOKkSY0=
|
||||
github.com/golang-jwt/jwt/v4 v4.1.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
|
||||
github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
|
||||
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
|
||||
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
|
||||
github.com/gorilla/mux v1.6.1 h1:KOwqsTYZdeuMacU7CxjMNYEKeBvLbxW+psodrbcEa3A=
|
||||
github.com/gorilla/mux v1.6.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
|
||||
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
|
||||
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
|
||||
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
|
||||
github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7FsgI=
|
||||
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
|
||||
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
|
||||
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=
|
||||
@ -73,8 +85,9 @@ github.com/shurcooL/vfsgen v0.0.0-20180121065927-ffb13db8def0/go.mod h1:TrYk7fJV
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
|
||||
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
||||
github.com/urfave/cli/v2 v2.1.1 h1:Qt8FeAtxE/vfdrLmR3rxR6JRE0RoVmbXu8+6kZtYU4k=
|
||||
github.com/urfave/cli/v2 v2.1.1/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ=
|
||||
github.com/vektah/dataloaden v0.2.1-0.20190515034641-a19b9a6e7c9e/go.mod h1:/HUdMve7rvxZma+2ZELQeNh88+003LL7Pf/CZ089j8U=
|
||||
@ -82,16 +95,26 @@ github.com/vektah/gqlparser/v2 v2.1.0 h1:uiKJ+T5HMGGQM2kRKQ8Pxw8+Zq9qhhZhz/lieYv
|
||||
github.com/vektah/gqlparser/v2 v2.1.0/go.mod h1:SyUiHgLATUR8BiYURfTirrTcGpcE+4XkV2se04Px1Ms=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871 h1:/pEO3GD/ABYAjuakUS6xSEmmlyVS4kxBNkeA9tLJiTI=
|
||||
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190125232054-d66bd3c5d5a6/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190515012406-7d7faa4812bd/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
||||
golang.org/x/tools v0.0.0-20200114235610-7ae403b6b589 h1:rjUrONFu4kLchcZTfp3/96bR8bW8dIa8uz3cR5n0cgM=
|
||||
@ -101,7 +124,8 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
|
||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
||||
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
sourcegraph.com/sourcegraph/appdash v0.0.0-20180110180208-2cc67fd64755/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU=
|
||||
sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67/go.mod h1:L5q+DGLGOQFpo1snNEkLOJT2d1YTW66rWNzatr3He1k=
|
||||
|
22
gqlgen.yml
22
gqlgen.yml
@ -55,17 +55,19 @@ models:
|
||||
- github.com/99designs/gqlgen/graphql.Int64
|
||||
- github.com/99designs/gqlgen/graphql.Int32
|
||||
Job:
|
||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.Job"
|
||||
fields:
|
||||
tags:
|
||||
resolver: true
|
||||
JobMetric:
|
||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobMetric"
|
||||
JobMetricSeries:
|
||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricSeries"
|
||||
JobMetricStatistics:
|
||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricStatistics"
|
||||
NullableFloat:
|
||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.Float"
|
||||
JobMetricScope:
|
||||
model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricScope"
|
||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Float" }
|
||||
MetricScope: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricScope" }
|
||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobStatistics" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Tag" }
|
||||
Resource: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Resource" }
|
||||
JobState: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobState" }
|
||||
JobMetric: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.Series" }
|
||||
MetricStatistics: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.MetricStatistics" }
|
||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-jobarchive/schema.StatsSeries" }
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,9 +1,17 @@
|
||||
package model
|
||||
|
||||
// Go look at `gqlgen.yml` and the schema package for other non-generated models.
|
||||
type Cluster struct {
|
||||
Name string `json:"name"`
|
||||
MetricConfig []*MetricConfig `json:"metricConfig"`
|
||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||
Partitions []*Partition `json:"partitions"`
|
||||
|
||||
type JobTag struct {
|
||||
ID string `json:"id" db:"id"`
|
||||
TagType string `json:"tagType" db:"tag_type"`
|
||||
TagName string `json:"tagName" db:"tag_name"`
|
||||
// NOT part of the API:
|
||||
MetricDataRepository *MetricDataRepository `json:"metricDataRepository"`
|
||||
}
|
||||
|
||||
type MetricDataRepository struct {
|
||||
Kind string `json:"kind"`
|
||||
Url string `json:"url"`
|
||||
Token string `json:"token"`
|
||||
}
|
||||
|
@ -11,17 +11,10 @@ import (
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
)
|
||||
|
||||
type Cluster struct {
|
||||
ClusterID string `json:"clusterID"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
FlopRateScalar int `json:"flopRateScalar"`
|
||||
FlopRateSimd int `json:"flopRateSimd"`
|
||||
MemoryBandwidth int `json:"memoryBandwidth"`
|
||||
MetricConfig []*MetricConfig `json:"metricConfig"`
|
||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||
type Accelerator struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Model string `json:"model"`
|
||||
}
|
||||
|
||||
type FilterRanges struct {
|
||||
@ -50,41 +43,20 @@ type IntRangeOutput struct {
|
||||
To int `json:"to"`
|
||||
}
|
||||
|
||||
type Job struct {
|
||||
ID string `json:"id"`
|
||||
JobID string `json:"jobId"`
|
||||
UserID string `json:"userId"`
|
||||
ProjectID string `json:"projectId"`
|
||||
ClusterID string `json:"clusterId"`
|
||||
StartTime time.Time `json:"startTime"`
|
||||
Duration int `json:"duration"`
|
||||
NumNodes int `json:"numNodes"`
|
||||
Nodes []string `json:"nodes"`
|
||||
HasProfile bool `json:"hasProfile"`
|
||||
State JobState `json:"state"`
|
||||
Tags []*JobTag `json:"tags"`
|
||||
LoadAvg *float64 `json:"loadAvg"`
|
||||
MemUsedMax *float64 `json:"memUsedMax"`
|
||||
FlopsAnyAvg *float64 `json:"flopsAnyAvg"`
|
||||
MemBwAvg *float64 `json:"memBwAvg"`
|
||||
NetBwAvg *float64 `json:"netBwAvg"`
|
||||
FileBwAvg *float64 `json:"fileBwAvg"`
|
||||
}
|
||||
|
||||
type JobFilter struct {
|
||||
Tags []string `json:"tags"`
|
||||
JobID *StringInput `json:"jobId"`
|
||||
UserID *StringInput `json:"userId"`
|
||||
ProjectID *StringInput `json:"projectId"`
|
||||
ClusterID *StringInput `json:"clusterId"`
|
||||
Duration *IntRange `json:"duration"`
|
||||
NumNodes *IntRange `json:"numNodes"`
|
||||
StartTime *TimeRange `json:"startTime"`
|
||||
IsRunning *bool `json:"isRunning"`
|
||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
|
||||
MemBwAvg *FloatRange `json:"memBwAvg"`
|
||||
LoadAvg *FloatRange `json:"loadAvg"`
|
||||
MemUsedMax *FloatRange `json:"memUsedMax"`
|
||||
Tags []string `json:"tags"`
|
||||
JobID *StringInput `json:"jobId"`
|
||||
User *StringInput `json:"user"`
|
||||
Project *StringInput `json:"project"`
|
||||
Cluster *StringInput `json:"cluster"`
|
||||
Duration *IntRange `json:"duration"`
|
||||
NumNodes *IntRange `json:"numNodes"`
|
||||
StartTime *TimeRange `json:"startTime"`
|
||||
State []schema.JobState `json:"state"`
|
||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
|
||||
MemBwAvg *FloatRange `json:"memBwAvg"`
|
||||
LoadAvg *FloatRange `json:"loadAvg"`
|
||||
MemUsedMax *FloatRange `json:"memUsedMax"`
|
||||
}
|
||||
|
||||
type JobMetricWithName struct {
|
||||
@ -93,10 +65,10 @@ type JobMetricWithName struct {
|
||||
}
|
||||
|
||||
type JobResultList struct {
|
||||
Items []*Job `json:"items"`
|
||||
Offset *int `json:"offset"`
|
||||
Limit *int `json:"limit"`
|
||||
Count *int `json:"count"`
|
||||
Items []*schema.Job `json:"items"`
|
||||
Offset *int `json:"offset"`
|
||||
Limit *int `json:"limit"`
|
||||
Count *int `json:"count"`
|
||||
}
|
||||
|
||||
type JobsStatistics struct {
|
||||
@ -110,13 +82,14 @@ type JobsStatistics struct {
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
Name string `json:"name"`
|
||||
Unit string `json:"unit"`
|
||||
Sampletime int `json:"sampletime"`
|
||||
Peak int `json:"peak"`
|
||||
Normal int `json:"normal"`
|
||||
Caution int `json:"caution"`
|
||||
Alert int `json:"alert"`
|
||||
Name string `json:"name"`
|
||||
Unit string `json:"unit"`
|
||||
Scope schema.MetricScope `json:"scope"`
|
||||
Timestep int `json:"timestep"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
}
|
||||
|
||||
type MetricFootprints struct {
|
||||
@ -124,6 +97,16 @@ type MetricFootprints struct {
|
||||
Footprints []schema.Float `json:"footprints"`
|
||||
}
|
||||
|
||||
type NodeMetric struct {
|
||||
Name string `json:"name"`
|
||||
Data []schema.Float `json:"data"`
|
||||
}
|
||||
|
||||
type NodeMetrics struct {
|
||||
ID string `json:"id"`
|
||||
Metrics []*NodeMetric `json:"metrics"`
|
||||
}
|
||||
|
||||
type OrderByInput struct {
|
||||
Field string `json:"field"`
|
||||
Order SortDirectionEnum `json:"order"`
|
||||
@ -134,6 +117,18 @@ type PageRequest struct {
|
||||
Page int `json:"page"`
|
||||
}
|
||||
|
||||
type Partition struct {
|
||||
Name string `json:"name"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
FlopRateScalar int `json:"flopRateScalar"`
|
||||
FlopRateSimd int `json:"flopRateSimd"`
|
||||
MemoryBandwidth int `json:"memoryBandwidth"`
|
||||
Topology *Topology `json:"topology"`
|
||||
}
|
||||
|
||||
type StringInput struct {
|
||||
Eq *string `json:"eq"`
|
||||
Contains *string `json:"contains"`
|
||||
@ -151,6 +146,15 @@ type TimeRangeOutput struct {
|
||||
To time.Time `json:"to"`
|
||||
}
|
||||
|
||||
type Topology struct {
|
||||
Node []int `json:"node"`
|
||||
Socket [][]int `json:"socket"`
|
||||
MemoryDomain [][]int `json:"memoryDomain"`
|
||||
Die [][]int `json:"die"`
|
||||
Core [][]int `json:"core"`
|
||||
Accelerators []*Accelerator `json:"accelerators"`
|
||||
}
|
||||
|
||||
type Aggregate string
|
||||
|
||||
const (
|
||||
@ -194,47 +198,6 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
type JobState string
|
||||
|
||||
const (
|
||||
JobStateRunning JobState = "running"
|
||||
JobStateCompleted JobState = "completed"
|
||||
)
|
||||
|
||||
var AllJobState = []JobState{
|
||||
JobStateRunning,
|
||||
JobStateCompleted,
|
||||
}
|
||||
|
||||
func (e JobState) IsValid() bool {
|
||||
switch e {
|
||||
case JobStateRunning, JobStateCompleted:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e JobState) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
}
|
||||
|
||||
*e = JobState(str)
|
||||
if !e.IsValid() {
|
||||
return fmt.Errorf("%s is not a valid JobState", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e JobState) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
type SortDirectionEnum string
|
||||
|
||||
const (
|
||||
|
@ -1,12 +1,15 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
@ -19,31 +22,10 @@ type Resolver struct {
|
||||
DB *sqlx.DB
|
||||
}
|
||||
|
||||
var JobTableCols []string = []string{"id", "job_id", "user_id", "project_id", "cluster_id", "start_time", "duration", "job_state", "num_nodes", "node_list", "flops_any_avg", "mem_bw_avg", "net_bw_avg", "file_bw_avg", "load_avg"}
|
||||
|
||||
type Scannable interface {
|
||||
Scan(dest ...interface{}) error
|
||||
}
|
||||
|
||||
// Helper function for scanning jobs with the `jobTableCols` columns selected.
|
||||
func ScanJob(row Scannable) (*model.Job, error) {
|
||||
job := &model.Job{HasProfile: true}
|
||||
|
||||
var nodeList string
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.UserID, &job.ProjectID, &job.ClusterID,
|
||||
&job.StartTime, &job.Duration, &job.State, &job.NumNodes, &nodeList,
|
||||
&job.FlopsAnyAvg, &job.MemBwAvg, &job.NetBwAvg, &job.FileBwAvg, &job.LoadAvg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job.Nodes = strings.Split(nodeList, ",")
|
||||
return job, nil
|
||||
}
|
||||
|
||||
// Helper function for the `jobs` GraphQL-Query. Is also used elsewhere when a list of jobs is needed.
|
||||
func (r *Resolver) queryJobs(filters []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) ([]*model.Job, int, error) {
|
||||
query := sq.Select(JobTableCols...).From("job")
|
||||
func (r *Resolver) queryJobs(ctx context.Context, filters []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) ([]*schema.Job, int, error) {
|
||||
query := sq.Select(schema.JobColumns...).From("job")
|
||||
query = securityCheck(ctx, query)
|
||||
|
||||
if order != nil {
|
||||
field := toSnakeCase(order.Field)
|
||||
@ -67,55 +49,68 @@ func (r *Resolver) queryJobs(filters []*model.JobFilter, page *model.PageRequest
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*model.Job, 0, 50)
|
||||
rows, err := r.DB.Queryx(sql, args...)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := ScanJob(rows)
|
||||
job, err := schema.ScanJob(rows)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
// count all jobs:
|
||||
query = sq.Select("count(*)").From("job")
|
||||
for _, f := range filters {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
rows, err = query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var count int
|
||||
rows.Next()
|
||||
if err := rows.Scan(&count); err != nil {
|
||||
if err := query.RunWith(r.DB).Scan(&count); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return jobs, count, nil
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a model.JobFilter.
|
||||
func securityCheck(ctx context.Context, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
val := ctx.Value(auth.ContextUserKey)
|
||||
if val == nil {
|
||||
return query
|
||||
}
|
||||
|
||||
user := val.(*auth.User)
|
||||
if user.IsAdmin {
|
||||
return query
|
||||
}
|
||||
|
||||
return query.Where("job.user_id = ?", user.Username)
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||
func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if filter.Tags != nil {
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where("jobtag.tag_id IN ?", filter.Tags)
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
|
||||
}
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
}
|
||||
if filter.UserID != nil {
|
||||
query = buildStringCondition("job.user_id", filter.UserID, query)
|
||||
if filter.User != nil {
|
||||
query = buildStringCondition("job.user", filter.User, query)
|
||||
}
|
||||
if filter.ProjectID != nil {
|
||||
query = buildStringCondition("job.project_id", filter.ProjectID, query)
|
||||
if filter.Project != nil {
|
||||
query = buildStringCondition("job.project", filter.Project, query)
|
||||
}
|
||||
if filter.ClusterID != nil {
|
||||
query = buildStringCondition("job.cluster_id", filter.ClusterID, query)
|
||||
if filter.Cluster != nil {
|
||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||
}
|
||||
if filter.StartTime != nil {
|
||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||
@ -123,12 +118,13 @@ func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
if filter.Duration != nil {
|
||||
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||
}
|
||||
if filter.IsRunning != nil {
|
||||
if *filter.IsRunning {
|
||||
query = query.Where("job.job_state = 'running'")
|
||||
} else {
|
||||
query = query.Where("job.job_state = 'completed'")
|
||||
if filter.State != nil {
|
||||
states := make([]string, len(filter.State))
|
||||
for i, val := range filter.State {
|
||||
states[i] = string(val)
|
||||
}
|
||||
|
||||
query = query.Where(sq.Eq{"job.job_state": states})
|
||||
}
|
||||
if filter.NumNodes != nil {
|
||||
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
|
||||
@ -173,20 +169,23 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
|
||||
return query.Where(field+" = ?", *cond.Eq)
|
||||
}
|
||||
if cond.StartsWith != nil {
|
||||
return query.Where(field+"LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
}
|
||||
if cond.EndsWith != nil {
|
||||
return query.Where(field+"LIKE ?", fmt.Sprint("%", *cond.StartsWith))
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
|
||||
}
|
||||
if cond.Contains != nil {
|
||||
return query.Where(field+"LIKE ?", fmt.Sprint("%", *cond.StartsWith, "%"))
|
||||
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
|
||||
func toSnakeCase(str string) string {
|
||||
matchFirstCap := regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
matchAllCap := regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
str = strings.ReplaceAll(str, "'", "")
|
||||
str = strings.ReplaceAll(str, "\\", "")
|
||||
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
|
||||
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
|
||||
return strings.ToLower(snake)
|
||||
|
@ -1,40 +1,38 @@
|
||||
scalar Time
|
||||
scalar NullableFloat
|
||||
scalar MetricScope
|
||||
scalar JobState
|
||||
|
||||
type Job {
|
||||
id: ID! # Database ID, unique
|
||||
jobId: String! # ID given to the job by the cluster scheduler
|
||||
userId: String! # Username
|
||||
projectId: String! # Project
|
||||
clusterId: String! # Name of the cluster this job was running on
|
||||
startTime: Time! # RFC3339 formated string
|
||||
duration: Int! # For running jobs, the time it has already run
|
||||
numNodes: Int! # Number of nodes this job was running on
|
||||
nodes: [String!]! # List of hostnames
|
||||
hasProfile: Boolean! # TODO: Could be removed?
|
||||
state: JobState! # State of the job
|
||||
tags: [JobTag!]! # List of tags this job has
|
||||
|
||||
# Will be null for running jobs.
|
||||
loadAvg: Float
|
||||
memUsedMax: Float
|
||||
flopsAnyAvg: Float
|
||||
memBwAvg: Float
|
||||
netBwAvg: Float
|
||||
fileBwAvg: Float
|
||||
}
|
||||
|
||||
# TODO: Extend by more possible states?
|
||||
enum JobState {
|
||||
running
|
||||
completed
|
||||
}
|
||||
|
||||
type JobTag {
|
||||
id: ID! # Database ID, unique
|
||||
tagType: String! # Type
|
||||
tagName: String! # Name
|
||||
id: ID!
|
||||
jobId: Int!
|
||||
user: String!
|
||||
project: String!
|
||||
cluster: String!
|
||||
startTime: Time!
|
||||
duration: Int!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
partition: String!
|
||||
arrayJobId: Int!
|
||||
monitoringStatus: Int!
|
||||
state: JobState!
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
}
|
||||
|
||||
type Cluster {
|
||||
clusterID: String!
|
||||
name: String!
|
||||
metricConfig: [MetricConfig!]!
|
||||
filterRanges: FilterRanges!
|
||||
partitions: [Partition!]!
|
||||
}
|
||||
|
||||
type Partition {
|
||||
name: String!
|
||||
processorType: String!
|
||||
socketsPerNode: Int!
|
||||
coresPerSocket: Int!
|
||||
@ -42,37 +40,46 @@ type Cluster {
|
||||
flopRateScalar: Int!
|
||||
flopRateSimd: Int!
|
||||
memoryBandwidth: Int!
|
||||
metricConfig: [MetricConfig!]!
|
||||
filterRanges: FilterRanges!
|
||||
topology: Topology!
|
||||
}
|
||||
|
||||
type Topology {
|
||||
node: [Int!]
|
||||
socket: [[Int!]!]
|
||||
memoryDomain: [[Int!]!]
|
||||
die: [[Int!]!]
|
||||
core: [[Int!]!]
|
||||
accelerators: [Accelerator!]
|
||||
}
|
||||
|
||||
type Accelerator {
|
||||
id: String!
|
||||
type: String!
|
||||
model: String!
|
||||
}
|
||||
|
||||
type MetricConfig {
|
||||
name: String!
|
||||
unit: String!
|
||||
sampletime: Int!
|
||||
peak: Int!
|
||||
normal: Int!
|
||||
caution: Int!
|
||||
alert: Int!
|
||||
}
|
||||
|
||||
type JobMetric {
|
||||
name: String!
|
||||
unit: String!
|
||||
scope: JobMetricScope!
|
||||
scope: MetricScope!
|
||||
timestep: Int!
|
||||
series: [JobMetricSeries!]!
|
||||
peak: Float!
|
||||
normal: Float!
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
}
|
||||
|
||||
type JobMetricSeries {
|
||||
node_id: String!
|
||||
statistics: JobMetricStatistics
|
||||
data: [NullableFloat!]!
|
||||
type Tag {
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
}
|
||||
|
||||
type JobMetricStatistics {
|
||||
avg: Float!
|
||||
min: Float!
|
||||
max: Float!
|
||||
type Resource {
|
||||
hostname: String!
|
||||
hwthreads: [Int!]
|
||||
accelerators: [Int!]
|
||||
configuration: String
|
||||
}
|
||||
|
||||
type JobMetricWithName {
|
||||
@ -80,6 +87,33 @@ type JobMetricWithName {
|
||||
metric: JobMetric!
|
||||
}
|
||||
|
||||
type JobMetric {
|
||||
unit: String!
|
||||
scope: MetricScope!
|
||||
timestep: Int!
|
||||
series: [Series!]
|
||||
statisticsSeries: StatsSeries
|
||||
}
|
||||
|
||||
type Series {
|
||||
hostname: String!
|
||||
id: Int
|
||||
statistics: MetricStatistics
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricStatistics {
|
||||
avg: NullableFloat!
|
||||
min: NullableFloat!
|
||||
max: NullableFloat!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
name: String!
|
||||
footprints: [NullableFloat!]!
|
||||
@ -87,38 +121,43 @@ type MetricFootprints {
|
||||
|
||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
||||
|
||||
type NodeMetric {
|
||||
name: String!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type NodeMetrics {
|
||||
id: String!
|
||||
metrics: [NodeMetric!]!
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [JobTag!]! # List of all tags
|
||||
tags: [Tag!]! # List of all tags
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!]): [JobMetricWithName!]!
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): [MetricFootprints]!
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
|
||||
|
||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||
|
||||
nodeMetrics(cluster: ID!, nodes: [String!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createTag(type: String!, name: String!): JobTag!
|
||||
createTag(type: String!, name: String!): Tag!
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [JobTag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [JobTag!]!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
|
||||
updateConfiguration(name: String!, value: String!): String
|
||||
}
|
||||
|
||||
type IntRangeOutput {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
|
||||
type TimeRangeOutput {
|
||||
from: Time!
|
||||
to: Time!
|
||||
}
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
|
||||
type FilterRanges {
|
||||
duration: IntRangeOutput!
|
||||
@ -129,13 +168,13 @@ type FilterRanges {
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
jobId: StringInput
|
||||
userId: StringInput
|
||||
projectId: StringInput
|
||||
clusterId: StringInput
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
cluster: StringInput
|
||||
duration: IntRange
|
||||
numNodes: IntRange
|
||||
startTime: TimeRange
|
||||
isRunning: Boolean
|
||||
state: [JobState!]
|
||||
flopsAnyAvg: FloatRange
|
||||
memBwAvg: FloatRange
|
||||
loadAvg: FloatRange
|
||||
@ -159,20 +198,9 @@ input StringInput {
|
||||
endsWith: String
|
||||
}
|
||||
|
||||
input IntRange {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
|
||||
input FloatRange {
|
||||
from: Float!
|
||||
to: Float!
|
||||
}
|
||||
|
||||
input TimeRange {
|
||||
from: Time
|
||||
to: Time
|
||||
}
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input FloatRange { from: Float!, to: Float! }
|
||||
input TimeRange { from: Time, to: Time }
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
@ -200,7 +228,3 @@ input PageRequest {
|
||||
itemsPerPage: Int!
|
||||
page: Int!
|
||||
}
|
||||
|
||||
scalar Time
|
||||
scalar NullableFloat
|
||||
scalar JobMetricScope
|
||||
|
@ -5,42 +5,41 @@ package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *model.Job) ([]*model.JobTag, error) {
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
query := sq.
|
||||
Select("tag.id", "tag.tag_type", "tag.tag_name").
|
||||
From("tag").
|
||||
Join("jobtag ON jobtag.tag_id = tag.id").
|
||||
Where("jobtag.job_id = ?", obj.ID)
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
tags := make([]*model.JobTag, 0)
|
||||
for rows.Next() {
|
||||
var tag model.JobTag
|
||||
if err := rows.Scan(&tag.ID, &tag.TagType, &tag.TagName); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, &tag)
|
||||
tags := make([]*schema.Tag, 0)
|
||||
if err := r.DB.Select(&tags, sql, args...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*model.JobTag, error) {
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||
res, err := r.DB.Exec("INSERT INTO tag (tag_type, tag_name) VALUES ($1, $2)", typeArg, name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -51,7 +50,7 @@ func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name s
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &model.JobTag{ID: strconv.FormatInt(id, 10), TagType: typeArg, TagName: name}, nil
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
||||
}
|
||||
|
||||
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
||||
@ -59,7 +58,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
|
||||
panic(fmt.Errorf("not implemented"))
|
||||
}
|
||||
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*model.JobTag, error) {
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
jid, err := strconv.Atoi(job)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -76,7 +75,9 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := r.Job().Tags(ctx, &model.Job{ID: job})
|
||||
dummyJob := schema.Job{}
|
||||
dummyJob.ID = int64(jid)
|
||||
tags, err := r.Job().Tags(ctx, &dummyJob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -89,7 +90,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
return tags, metricdata.UpdateTags(jobObj, tags)
|
||||
}
|
||||
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*model.JobTag, error) {
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
jid, err := strconv.Atoi(job)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -106,7 +107,9 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := r.Job().Tags(ctx, &model.Job{ID: job})
|
||||
dummyJob := schema.Job{}
|
||||
dummyJob.ID = int64(jid)
|
||||
tags, err := r.Job().Tags(ctx, &dummyJob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -131,46 +134,53 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*model.Cluster, error)
|
||||
return config.Clusters, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*model.JobTag, error) {
|
||||
rows, err := sq.Select("id", "tag_type", "tag_name").From("tag").RunWith(r.DB).Query()
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
sql, args, err := sq.Select("id", "tag_type", "tag_name").From("tag").ToSql()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
tags := make([]*model.JobTag, 0)
|
||||
for rows.Next() {
|
||||
var tag model.JobTag
|
||||
if err := rows.Scan(&tag.ID, &tag.TagType, &tag.TagName); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, &tag)
|
||||
tags := make([]*schema.Tag, 0)
|
||||
if err := r.DB.Select(&tags, sql, args...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) Job(ctx context.Context, id string) (*model.Job, error) {
|
||||
return ScanJob(sq.Select(JobTableCols...).From("job").Where("job.id = ?", id).RunWith(r.DB).QueryRow())
|
||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||
query := sq.Select(schema.JobColumns...).From("job").Where("job.id = ?", id)
|
||||
query = securityCheck(ctx, query)
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return schema.ScanJob(r.DB.QueryRowx(sql, args...))
|
||||
}
|
||||
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string) ([]*model.JobMetricWithName, error) {
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadData(job, metrics, ctx)
|
||||
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.JobMetricWithName{}
|
||||
for name, md := range data {
|
||||
res = append(res, &model.JobMetricWithName{
|
||||
Name: name,
|
||||
Metric: md,
|
||||
})
|
||||
for scope, metric := range md {
|
||||
if metric.Scope != schema.MetricScope(scope) {
|
||||
panic("WTF?")
|
||||
}
|
||||
|
||||
res = append(res, &model.JobMetricWithName{
|
||||
Name: name,
|
||||
Metric: metric,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return res, err
|
||||
@ -181,7 +191,7 @@ func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error) {
|
||||
jobs, count, err := r.queryJobs(filter, page, order)
|
||||
jobs, count, err := r.queryJobs(ctx, filter, page, order)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -197,6 +207,36 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
|
||||
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
||||
}
|
||||
|
||||
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
||||
user := auth.GetUser(ctx)
|
||||
if user != nil && !user.IsAdmin {
|
||||
return nil, errors.New("you need to be an administrator for this query")
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, from.Unix(), to.Unix(), ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := make([]*model.NodeMetrics, 0, len(data))
|
||||
for node, metrics := range data {
|
||||
nodeMetrics := make([]*model.NodeMetric, 0, len(metrics))
|
||||
for metric, data := range metrics {
|
||||
nodeMetrics = append(nodeMetrics, &model.NodeMetric{
|
||||
Name: metric,
|
||||
Data: data,
|
||||
})
|
||||
}
|
||||
|
||||
res = append(res, &model.NodeMetrics{
|
||||
ID: node,
|
||||
Metrics: nodeMetrics,
|
||||
})
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Job returns generated.JobResolver implementation.
|
||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||
|
||||
|
122
graph/stats.go
122
graph/stats.go
@ -3,6 +3,7 @@ package graph
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
@ -16,9 +17,9 @@ import (
|
||||
|
||||
// GraphQL validation should make sure that no unkown values can be specified.
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.user_id",
|
||||
model.AggregateProject: "job.project_id",
|
||||
model.AggregateCluster: "job.cluster_id",
|
||||
model.AggregateUser: "job.user",
|
||||
model.AggregateProject: "job.project",
|
||||
model.AggregateCluster: "job.cluster",
|
||||
}
|
||||
|
||||
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
@ -28,52 +29,59 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
|
||||
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
||||
for _, cluster := range config.Clusters {
|
||||
corehoursCol := fmt.Sprintf("SUM(job.duration * job.num_nodes * %d * %d) / 3600", cluster.SocketsPerNode, cluster.CoresPerSocket)
|
||||
var query sq.SelectBuilder
|
||||
if groupBy == nil {
|
||||
query = sq.Select(
|
||||
"''",
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job").Where("job.cluster_id = ?", cluster.ClusterID)
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
query = sq.Select(
|
||||
col,
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job").Where("job.cluster_id = ?", cluster.ClusterID).GroupBy(col)
|
||||
}
|
||||
for _, partition := range cluster.Partitions {
|
||||
corehoursCol := fmt.Sprintf("SUM(job.duration * job.num_nodes * %d * %d) / 3600", partition.SocketsPerNode, partition.CoresPerSocket)
|
||||
var query sq.SelectBuilder
|
||||
if groupBy == nil {
|
||||
query = sq.Select(
|
||||
"''",
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job")
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
query = sq.Select(
|
||||
col,
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job").GroupBy(col)
|
||||
}
|
||||
|
||||
for _, f := range filter {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
query = query.
|
||||
Where("job.cluster = ?", cluster.Name).
|
||||
Where("job.partition = ?", partition.Name)
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
query = securityCheck(ctx, query)
|
||||
for _, f := range filter {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var jobs, walltime, corehours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
if s, ok := stats[id.String]; ok {
|
||||
s.TotalJobs += int(jobs.Int64)
|
||||
s.TotalWalltime += int(walltime.Int64)
|
||||
s.TotalCoreHours += int(corehours.Int64)
|
||||
} else {
|
||||
stats[id.String] = &model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(jobs.Int64),
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalCoreHours: int(corehours.Int64),
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var jobs, walltime, corehours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
if s, ok := stats[id.String]; ok {
|
||||
s.TotalJobs += int(jobs.Int64)
|
||||
s.TotalWalltime += int(walltime.Int64)
|
||||
s.TotalCoreHours += int(corehours.Int64)
|
||||
} else {
|
||||
stats[id.String] = &model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(jobs.Int64),
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalCoreHours: int(corehours.Int64),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -82,6 +90,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
|
||||
if groupBy == nil {
|
||||
query := sq.Select("COUNT(job.id)").From("job").Where("job.duration < 120")
|
||||
query = securityCheck(ctx, query)
|
||||
for _, f := range filter {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
@ -91,6 +100,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
query := sq.Select(col, "COUNT(job.id)").From("job").Where("job.duration < 120")
|
||||
query = securityCheck(ctx, query)
|
||||
for _, f := range filter {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
@ -133,12 +143,12 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
|
||||
if histogramsNeeded {
|
||||
var err error
|
||||
stat.HistWalltime, err = r.jobsStatisticsHistogram("ROUND(job.duration / 3600) as value", filter, id, col)
|
||||
stat.HistWalltime, err = r.jobsStatisticsHistogram(ctx, "ROUND(job.duration / 3600) as value", filter, id, col)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stat.HistNumNodes, err = r.jobsStatisticsHistogram("job.num_nodes as value", filter, id, col)
|
||||
stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter, id, col)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -150,8 +160,9 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
|
||||
// `value` must be the column grouped by, but renamed to "value". `id` and `col` can optionally be used
|
||||
// to add a condition to the query of the kind "<col> = <id>".
|
||||
func (r *queryResolver) jobsStatisticsHistogram(value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
|
||||
func (r *queryResolver) jobsStatisticsHistogram(ctx context.Context, value string, filters []*model.JobFilter, id, col string) ([]*model.HistoPoint, error) {
|
||||
query := sq.Select(value, "COUNT(job.id) AS count").From("job")
|
||||
query = securityCheck(ctx, query)
|
||||
for _, f := range filters {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
@ -179,7 +190,7 @@ func (r *queryResolver) jobsStatisticsHistogram(value string, filters []*model.J
|
||||
|
||||
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||
jobs, count, err := r.queryJobs(filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
||||
jobs, count, err := r.queryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -195,14 +206,21 @@ func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilte
|
||||
}
|
||||
|
||||
for _, job := range jobs {
|
||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, ctx)
|
||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
flops, membw := jobdata["flops_any"], jobdata["mem_bw"]
|
||||
if flops == nil && membw == nil {
|
||||
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %s", job.ID)
|
||||
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
||||
if flops_ == nil && membw_ == nil {
|
||||
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||
}
|
||||
|
||||
flops, ok1 := flops_["node"]
|
||||
membw, ok2 := membw_["node"]
|
||||
if !ok1 || !ok2 {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||
}
|
||||
|
||||
for n := 0; n < len(flops.Series); n++ {
|
||||
@ -232,7 +250,7 @@ func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilte
|
||||
|
||||
// Helper function for the jobsFootprints GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.MetricFootprints, error) {
|
||||
jobs, count, err := r.queryJobs(filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
||||
jobs, count, err := r.queryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: 501}, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
235
init-db.go
235
init-db.go
@ -2,18 +2,66 @@ package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
const JOBS_DB_SCHEMA string = `
|
||||
DROP TABLE IF EXISTS job;
|
||||
DROP TABLE IF EXISTS tag;
|
||||
DROP TABLE IF EXISTS jobtag;
|
||||
|
||||
CREATE TABLE job (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT, -- Not needed in sqlite
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster VARCHAR(255) NOT NULL,
|
||||
start_time TIMESTAMP NOT NULL,
|
||||
|
||||
user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
partition VARCHAR(255) NOT NULL,
|
||||
array_job_id BIGINT NOT NULL,
|
||||
duration INT,
|
||||
job_state VARCHAR(255) CHECK(job_state IN ('running', 'completed', 'failed', 'canceled', 'stopped', 'timeout')) NOT NULL,
|
||||
meta_data TEXT, -- json, but sqlite has no json type
|
||||
resources TEXT NOT NULL, -- json, but sqlite has no json type
|
||||
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT NOT NULL,
|
||||
num_acc INT NOT NULL,
|
||||
smt TINYINT CHECK(smt IN (0, 1 )) NOT NULL DEFAULT 1,
|
||||
exclusive TINYINT CHECK(exclusive IN (0, 1, 2)) NOT NULL DEFAULT 1,
|
||||
monitoring_status TINYINT CHECK(monitoring_status IN (0, 1 )) NOT NULL DEFAULT 1,
|
||||
|
||||
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
load_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||
file_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
file_data_vol_total REAL NOT NULL DEFAULT 0.0);
|
||||
|
||||
CREATE TABLE tag (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tag_type VARCHAR(255) NOT NULL,
|
||||
tag_name VARCHAR(255) NOT NULL);
|
||||
|
||||
CREATE TABLE jobtag (
|
||||
job_id INTEGER,
|
||||
tag_id INTEGER,
|
||||
PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
|
||||
`
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
func initDB(db *sqlx.DB, archive string) error {
|
||||
@ -21,99 +69,101 @@ func initDB(db *sqlx.DB, archive string) error {
|
||||
fmt.Println("Building database...")
|
||||
|
||||
// Basic database structure:
|
||||
_, err := db.Exec(`
|
||||
DROP TABLE IF EXISTS job;
|
||||
DROP TABLE IF EXISTS tag;
|
||||
DROP TABLE IF EXISTS jobtag;
|
||||
|
||||
CREATE TABLE job (
|
||||
id INTEGER PRIMARY KEY,
|
||||
job_id TEXT,
|
||||
user_id TEXT,
|
||||
project_id TEXT,
|
||||
cluster_id TEXT,
|
||||
start_time TIMESTAMP,
|
||||
duration INTEGER,
|
||||
job_state TEXT,
|
||||
num_nodes INTEGER,
|
||||
node_list TEXT,
|
||||
metadata TEXT,
|
||||
|
||||
flops_any_avg REAL,
|
||||
mem_bw_avg REAL,
|
||||
net_bw_avg REAL,
|
||||
file_bw_avg REAL,
|
||||
load_avg REAL);
|
||||
CREATE TABLE tag (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tag_type TEXT,
|
||||
tag_name TEXT);
|
||||
CREATE TABLE jobtag (
|
||||
job_id INTEGER,
|
||||
tag_id INTEGER,
|
||||
PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE ON UPDATE NO ACTION,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE ON UPDATE NO ACTION);`)
|
||||
_, err := db.Exec(JOBS_DB_SCHEMA)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entries0, err := os.ReadDir(archive)
|
||||
clustersDir, err := os.ReadDir(archive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
insertstmt, err := db.Prepare(`INSERT INTO job
|
||||
(job_id, user_id, project_id, cluster_id, start_time, duration, job_state, num_nodes, node_list, metadata, flops_any_avg, mem_bw_avg, net_bw_avg, file_bw_avg, load_avg)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var tx *sql.Tx = nil
|
||||
var i int = 0
|
||||
tx, err := db.Beginx()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stmt, err := tx.PrepareNamed(`INSERT INTO job (
|
||||
job_id, user, project, cluster, partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, resources, meta_data,
|
||||
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :resources, :meta_data,
|
||||
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
||||
);`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
i := 0
|
||||
tags := make(map[string]int64)
|
||||
for _, entry0 := range entries0 {
|
||||
entries1, err := os.ReadDir(filepath.Join(archive, entry0.Name()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, entry1 := range entries1 {
|
||||
if !entry1.IsDir() {
|
||||
continue
|
||||
handleDirectory := func(filename string) error {
|
||||
// Bundle 100 inserts into one transaction for better performance:
|
||||
if i%100 == 0 {
|
||||
if tx != nil {
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
entries2, err := os.ReadDir(filepath.Join(archive, entry0.Name(), entry1.Name()))
|
||||
tx, err = db.Beginx()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, entry2 := range entries2 {
|
||||
// Bundle 200 inserts into one transaction for better performance:
|
||||
if i%200 == 0 {
|
||||
if tx != nil {
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
stmt = tx.NamedStmt(stmt)
|
||||
fmt.Printf("%d jobs inserted...\r", i)
|
||||
}
|
||||
|
||||
err := loadJob(tx, stmt, tags, filename)
|
||||
if err == nil {
|
||||
i += 1
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
for _, clusterDir := range clustersDir {
|
||||
lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, lvl1Dir := range lvl1Dirs {
|
||||
if !lvl1Dir.IsDir() {
|
||||
// Could be the cluster.json file
|
||||
continue
|
||||
}
|
||||
|
||||
lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, lvl2Dir := range lvl2Dirs {
|
||||
dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
|
||||
startTimeDirs, err := os.ReadDir(dirpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, startTimeDir := range startTimeDirs {
|
||||
if startTimeDir.Type().IsRegular() && startTimeDir.Name() == "meta.json" {
|
||||
if err := handleDirectory(dirpath); err != nil {
|
||||
log.Printf("in %s: %s\n", dirpath, err.Error())
|
||||
}
|
||||
} else if startTimeDir.IsDir() {
|
||||
if err := handleDirectory(filepath.Join(dirpath, startTimeDir.Name())); err != nil {
|
||||
log.Printf("in %s: %s\n", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
tx, err = db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
insertstmt = tx.Stmt(insertstmt)
|
||||
fmt.Printf("%d jobs inserted...\r", i)
|
||||
}
|
||||
|
||||
filename := filepath.Join(archive, entry0.Name(), entry1.Name(), entry2.Name())
|
||||
if err = loadJob(tx, insertstmt, tags, filename); err != nil {
|
||||
fmt.Printf("failed to load '%s': %s", filename, err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -125,37 +175,44 @@ func initDB(db *sqlx.DB, archive string) error {
|
||||
// Create indexes after inserts so that they do not
|
||||
// need to be continually updated.
|
||||
if _, err := db.Exec(`
|
||||
CREATE INDEX job_by_user ON job (user_id);
|
||||
CREATE INDEX job_by_user ON job (user);
|
||||
CREATE INDEX job_by_starttime ON job (start_time);`); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
|
||||
log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read the `meta.json` file at `path` and insert it to the database using the prepared
|
||||
// insert statement `stmt`. `tags` maps all existing tags to their database ID.
|
||||
func loadJob(tx *sql.Tx, stmt *sql.Stmt, tags map[string]int64, path string) error {
|
||||
func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path string) error {
|
||||
f, err := os.Open(filepath.Join(path, "meta.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var job schema.JobMeta
|
||||
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&job); err != nil {
|
||||
var jobMeta schema.JobMeta = schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&jobMeta); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
flopsAnyAvg := loadJobStat(&job, "flops_any")
|
||||
memBwAvg := loadJobStat(&job, "mem_bw")
|
||||
netBwAvg := loadJobStat(&job, "net_bw")
|
||||
fileBwAvg := loadJobStat(&job, "file_bw")
|
||||
loadAvg := loadJobStat(&job, "load_one")
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
}
|
||||
|
||||
res, err := stmt.Exec(job.JobId, job.UserId, job.ProjectId, job.ClusterId, job.StartTime, job.Duration, job.JobState,
|
||||
job.NumNodes, strings.Join(job.Nodes, ","), nil, flopsAnyAvg, memBwAvg, netBwAvg, fileBwAvg, loadAvg)
|
||||
// TODO: Other metrics...
|
||||
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
|
||||
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
res, err := stmt.Exec(job)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -188,12 +245,10 @@ func loadJob(tx *sql.Tx, stmt *sql.Stmt, tags map[string]int64, path string) err
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadJobStat(job *schema.JobMeta, metric string) sql.NullFloat64 {
|
||||
val := sql.NullFloat64{Valid: false}
|
||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
val.Valid = true
|
||||
val.Float64 = stats.Avg
|
||||
return stats.Avg
|
||||
}
|
||||
|
||||
return val
|
||||
return 0.0
|
||||
}
|
||||
|
@ -11,35 +11,30 @@ import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
)
|
||||
|
||||
var JobArchivePath string = "./var/job-archive"
|
||||
|
||||
// For a given job, return the path of the `data.json`/`meta.json` file.
|
||||
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
|
||||
func getPath(job *model.Job, file string) (string, error) {
|
||||
id, err := strconv.Atoi(strings.Split(job.JobID, ".")[0])
|
||||
if err != nil {
|
||||
return "", err
|
||||
func getPath(job *schema.Job, file string, checkLegacy bool) (string, error) {
|
||||
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
|
||||
if !checkLegacy {
|
||||
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
}
|
||||
|
||||
lvl1, lvl2 := fmt.Sprintf("%d", id/1000), fmt.Sprintf("%03d", id%1000)
|
||||
legacyPath := filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, file)
|
||||
legacyPath := filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, file)
|
||||
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
|
||||
return filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
}
|
||||
|
||||
return legacyPath, nil
|
||||
}
|
||||
|
||||
// Assuming job is completed/archived, return the jobs metric data.
|
||||
func loadFromArchive(job *model.Job) (schema.JobData, error) {
|
||||
filename, err := getPath(job, "data.json")
|
||||
func loadFromArchive(job *schema.Job) (schema.JobData, error) {
|
||||
filename, err := getPath(job, "data.json", true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -60,12 +55,12 @@ func loadFromArchive(job *model.Job) (schema.JobData, error) {
|
||||
|
||||
// If the job is archived, find its `meta.json` file and override the tags list
|
||||
// in that JSON file. If the job is not archived, nothing is done.
|
||||
func UpdateTags(job *model.Job, tags []*model.JobTag) error {
|
||||
if job.State == model.JobStateRunning {
|
||||
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||
if job.State == schema.JobStateRunning {
|
||||
return nil
|
||||
}
|
||||
|
||||
filename, err := getPath(job, "meta.json")
|
||||
filename, err := getPath(job, "meta.json", true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -78,23 +73,19 @@ func UpdateTags(job *model.Job, tags []*model.JobTag) error {
|
||||
return err
|
||||
}
|
||||
|
||||
var metaFile schema.JobMeta
|
||||
var metaFile schema.JobMeta = schema.JobMeta{
|
||||
BaseJob: schema.JobDefaults,
|
||||
}
|
||||
if err := json.NewDecoder(f).Decode(&metaFile); err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
|
||||
metaFile.Tags = make([]struct {
|
||||
Name string "json:\"name\""
|
||||
Type string "json:\"type\""
|
||||
}, 0)
|
||||
metaFile.Tags = make([]*schema.Tag, 0)
|
||||
for _, tag := range tags {
|
||||
metaFile.Tags = append(metaFile.Tags, struct {
|
||||
Name string "json:\"name\""
|
||||
Type string "json:\"type\""
|
||||
}{
|
||||
Name: tag.TagName,
|
||||
Type: tag.TagType,
|
||||
metaFile.Tags = append(metaFile.Tags, &schema.Tag{
|
||||
Name: tag.Name,
|
||||
Type: tag.Type,
|
||||
})
|
||||
}
|
||||
|
||||
@ -107,8 +98,8 @@ func UpdateTags(job *model.Job, tags []*model.JobTag) error {
|
||||
}
|
||||
|
||||
// Helper to metricdata.LoadAverages().
|
||||
func loadAveragesFromArchive(job *model.Job, metrics []string, data [][]schema.Float) error {
|
||||
filename, err := getPath(job, "meta.json")
|
||||
func loadAveragesFromArchive(job *schema.Job, metrics []string, data [][]schema.Float) error {
|
||||
filename, err := getPath(job, "meta.json", true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -135,97 +126,144 @@ func loadAveragesFromArchive(job *model.Job, metrics []string, data [][]schema.F
|
||||
}
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *model.Job, ctx context.Context) error {
|
||||
if job.State != model.JobStateRunning {
|
||||
return errors.New("cannot archive job that is not running")
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
if job.State != schema.JobStateRunning {
|
||||
return nil, errors.New("cannot archive job that is not running")
|
||||
}
|
||||
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := config.GetClusterConfig(job.ClusterID).MetricConfig
|
||||
metricConfigs := config.GetClusterConfig(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
jobData, err := LoadData(job, allMetrics, ctx)
|
||||
|
||||
// TODO: Use more granular resolution on non-exclusive jobs?
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
}{}
|
||||
for _, tag := range job.Tags {
|
||||
tags = append(tags, struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
}{
|
||||
Name: tag.TagName,
|
||||
Type: tag.TagType,
|
||||
})
|
||||
if err := calcStatisticsSeries(job, jobData); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metaData := &schema.JobMeta{
|
||||
JobId: job.JobID,
|
||||
UserId: job.UserID,
|
||||
ClusterId: job.ClusterID,
|
||||
NumNodes: job.NumNodes,
|
||||
JobState: job.State.String(),
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Duration: int64(job.Duration),
|
||||
Nodes: job.Nodes,
|
||||
Tags: tags,
|
||||
Statistics: make(map[string]*schema.JobMetaStatistics),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
for _, nodedata := range data.Series {
|
||||
avg += nodedata.Statistics.Avg
|
||||
min = math.Min(min, nodedata.Statistics.Min)
|
||||
max = math.Max(max, nodedata.Statistics.Max)
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// TODO/FIXME: Calc average for non-node metrics as well!
|
||||
continue
|
||||
}
|
||||
|
||||
metaData.Statistics[metric] = &schema.JobMetaStatistics{
|
||||
Unit: config.GetMetricConfig(job.ClusterID, metric).Unit,
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: config.GetMetricConfig(job.Cluster, metric).Unit,
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
dirPath, err := getPath(job, "")
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if !useArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
dirPath, err := getPath(job, "", false)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(dirPath, 0777); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f, err := os.Create(path.Join(dirPath, "meta.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
writer := bufio.NewWriter(f)
|
||||
if err := json.NewEncoder(writer).Encode(metaData); err != nil {
|
||||
return err
|
||||
if err := json.NewEncoder(writer).Encode(jobMeta); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := writer.Flush(); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f, err = os.Create(path.Join(dirPath, "data.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
writer = bufio.NewWriter(f)
|
||||
if err := json.NewEncoder(writer).Encode(metaData); err != nil {
|
||||
return err
|
||||
if err := json.NewEncoder(writer).Encode(jobData); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := writer.Flush(); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return f.Close()
|
||||
return jobMeta, f.Close()
|
||||
}
|
||||
|
||||
// Add statisticsSeries fields
|
||||
func calcStatisticsSeries(job *schema.Job, jobData schema.JobData) error {
|
||||
for _, scopes := range jobData {
|
||||
for _, jobMetric := range scopes {
|
||||
if jobMetric.StatisticsSeries != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(jobMetric.Series) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
n := 0
|
||||
for _, series := range jobMetric.Series {
|
||||
if len(series.Data) > n {
|
||||
n = len(series.Data)
|
||||
}
|
||||
}
|
||||
|
||||
mean, min, max := make([]schema.Float, n), make([]schema.Float, n), make([]schema.Float, n)
|
||||
for i := 0; i < n; i++ {
|
||||
sum, smin, smax := schema.Float(0.), math.MaxFloat32, -math.MaxFloat32
|
||||
for _, series := range jobMetric.Series {
|
||||
if i >= len(series.Data) {
|
||||
sum, smin, smax = schema.NaN, math.NaN(), math.NaN()
|
||||
break
|
||||
}
|
||||
x := series.Data[i]
|
||||
sum += x
|
||||
smin = math.Min(smin, float64(x))
|
||||
smax = math.Max(smax, float64(x))
|
||||
}
|
||||
sum /= schema.Float(len(jobMetric.Series))
|
||||
mean[i] = sum
|
||||
min[i] = schema.Float(smin)
|
||||
max[i] = schema.Float(smax)
|
||||
}
|
||||
|
||||
jobMetric.StatisticsSeries = &schema.StatsSeries{
|
||||
Min: min, Mean: mean, Max: max,
|
||||
}
|
||||
jobMetric.Series = nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -1,17 +1,18 @@
|
||||
package metricdata
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
)
|
||||
|
||||
@ -31,9 +32,9 @@ type ApiMetricData struct {
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Data []schema.Float `json:"data"`
|
||||
Avg *float64 `json:"avg"`
|
||||
Min *float64 `json:"min"`
|
||||
Max *float64 `json:"max"`
|
||||
Avg schema.Float `json:"avg"`
|
||||
Min schema.Float `json:"min"`
|
||||
Max schema.Float `json:"max"`
|
||||
}
|
||||
|
||||
type ApiStatsData struct {
|
||||
@ -46,22 +47,23 @@ type ApiStatsData struct {
|
||||
Max schema.Float `json:"max"`
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) Init() error {
|
||||
ccms.url = os.Getenv("CCMETRICSTORE_URL")
|
||||
ccms.jwt = os.Getenv("CCMETRICSTORE_JWT")
|
||||
if ccms.url == "" || ccms.jwt == "" {
|
||||
return errors.New("environment variables 'CCMETRICSTORE_URL' or 'CCMETRICSTORE_JWT' not set")
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) Init(url, token string) error {
|
||||
ccms.url = url
|
||||
ccms.jwt = token
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
||||
func (ccms *CCMetricStore) doRequest(job *schema.Job, suffix string, metrics []string, ctx context.Context) (*http.Response, error) {
|
||||
from, to := job.StartTime.Unix(), job.StartTime.Add(time.Duration(job.Duration)*time.Second).Unix()
|
||||
reqBody := ApiRequestBody{}
|
||||
reqBody.Metrics = metrics
|
||||
for _, node := range job.Nodes {
|
||||
reqBody.Selectors = append(reqBody.Selectors, []string{job.ClusterID, node})
|
||||
for _, node := range job.Resources {
|
||||
if node.Accelerators != nil || node.HWThreads != nil {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
|
||||
}
|
||||
|
||||
reqBody.Selectors = append(reqBody.Selectors, []string{job.Cluster, node.Hostname})
|
||||
}
|
||||
|
||||
reqBodyBytes, err := json.Marshal(reqBody)
|
||||
@ -69,53 +71,324 @@ func (ccms *CCMetricStore) LoadData(job *model.Job, metrics []string, ctx contex
|
||||
return nil, err
|
||||
}
|
||||
|
||||
authHeader := fmt.Sprintf("Bearer %s", ccms.jwt)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%d/%d/timeseries?with-stats=true", ccms.url, from, to), bytes.NewReader(reqBodyBytes))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%d/%d/%s", ccms.url, from, to, suffix), bytes.NewReader(reqBodyBytes))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Add("Authorization", authHeader)
|
||||
if ccms.jwt != "" {
|
||||
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||
}
|
||||
return ccms.client.Do(req)
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
// log.Printf("job: %#v", job)
|
||||
|
||||
type ApiQuery struct {
|
||||
Metric string `json:"metric"`
|
||||
Hostname string `json:"hostname"`
|
||||
Type *string `json:"type,omitempty"`
|
||||
TypeIds []string `json:"type-ids,omitempty"`
|
||||
SubType *string `json:"subtype,omitempty"`
|
||||
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||
}
|
||||
|
||||
type ApiQueryRequest struct {
|
||||
Cluster string `json:"cluster"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Queries []ApiQuery `json:"queries"`
|
||||
}
|
||||
|
||||
type ApiQueryResponse struct {
|
||||
ApiMetricData
|
||||
Query *ApiQuery `json:"query"`
|
||||
}
|
||||
|
||||
reqBody := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime.Unix(),
|
||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
||||
Queries: make([]ApiQuery, 0),
|
||||
}
|
||||
|
||||
if len(scopes) != 1 {
|
||||
return nil, errors.New("todo: support more than one scope in a query")
|
||||
}
|
||||
|
||||
topology := config.GetPartition(job.Cluster, job.Partition).Topology
|
||||
scopeForMetric := map[string]schema.MetricScope{}
|
||||
for _, metric := range metrics {
|
||||
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
nativeScope, requestedScope := mc.Scope, scopes[0]
|
||||
|
||||
// case 1: A metric is requested at node scope with a native scope of node as well
|
||||
// case 2: A metric is requested at node scope and node is exclusive
|
||||
// case 3: A metric has native scope node
|
||||
if (nativeScope == requestedScope && nativeScope == schema.MetricScopeNode) ||
|
||||
(job.Exclusive == 1 && requestedScope == schema.MetricScopeNode) ||
|
||||
(nativeScope == schema.MetricScopeNode) {
|
||||
nodes := map[string]bool{}
|
||||
for _, resource := range job.Resources {
|
||||
nodes[resource.Hostname] = true
|
||||
}
|
||||
|
||||
for node := range nodes {
|
||||
reqBody.Queries = append(reqBody.Queries, ApiQuery{
|
||||
Metric: metric,
|
||||
Hostname: node,
|
||||
})
|
||||
}
|
||||
|
||||
scopeForMetric[metric] = schema.MetricScopeNode
|
||||
continue
|
||||
}
|
||||
|
||||
// case: Read a metric at hwthread scope with native scope hwthread
|
||||
if nativeScope == requestedScope && nativeScope == schema.MetricScopeHWThread && job.NumNodes == 1 {
|
||||
hwthreads := job.Resources[0].HWThreads
|
||||
if hwthreads == nil {
|
||||
hwthreads = topology.Node
|
||||
}
|
||||
|
||||
t := "cpu" // TODO/FIXME: inconsistency between cc-metric-collector and ClusterCockpit
|
||||
for _, hwthread := range hwthreads {
|
||||
reqBody.Queries = append(reqBody.Queries, ApiQuery{
|
||||
Metric: metric,
|
||||
Hostname: job.Resources[0].Hostname,
|
||||
Type: &t,
|
||||
TypeIds: []string{strconv.Itoa(hwthread)},
|
||||
})
|
||||
}
|
||||
|
||||
scopeForMetric[metric] = schema.MetricScopeHWThread
|
||||
continue
|
||||
}
|
||||
|
||||
// case: A metric is requested at node scope, has a hwthread scope and node is not exclusive and runs on a single node
|
||||
if requestedScope == schema.MetricScopeNode && nativeScope == schema.MetricScopeHWThread && job.Exclusive != 1 && job.NumNodes == 1 {
|
||||
hwthreads := job.Resources[0].HWThreads
|
||||
if hwthreads == nil {
|
||||
hwthreads = topology.Node
|
||||
}
|
||||
|
||||
t := "cpu" // TODO/FIXME: inconsistency between cc-metric-collector and ClusterCockpit
|
||||
ids := make([]string, 0, len(hwthreads))
|
||||
for _, hwthread := range hwthreads {
|
||||
ids = append(ids, strconv.Itoa(hwthread))
|
||||
}
|
||||
|
||||
reqBody.Queries = append(reqBody.Queries, ApiQuery{
|
||||
Metric: metric,
|
||||
Hostname: job.Resources[0].Hostname,
|
||||
Type: &t,
|
||||
TypeIds: ids,
|
||||
})
|
||||
scopeForMetric[metric] = schema.MetricScopeNode
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO: Job teilt sich knoten und metric native scope ist kleiner als node
|
||||
panic("todo")
|
||||
}
|
||||
|
||||
// log.Printf("query: %#v", reqBody)
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
if err := json.NewEncoder(buf).Encode(reqBody); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.url+"/api/query", buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ccms.jwt != "" {
|
||||
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||
}
|
||||
res, err := ccms.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("cc-metric-store replied with: %s", res.Status)
|
||||
}
|
||||
|
||||
resdata := make([]map[string]ApiMetricData, 0, len(reqBody.Selectors))
|
||||
var resBody []ApiQueryResponse
|
||||
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// log.Printf("response: %#v", resBody)
|
||||
|
||||
var jobData schema.JobData = make(schema.JobData)
|
||||
for _, res := range resBody {
|
||||
|
||||
metric := res.Query.Metric
|
||||
if _, ok := jobData[metric]; !ok {
|
||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
}
|
||||
|
||||
if res.Error != nil {
|
||||
return nil, fmt.Errorf("cc-metric-store error while fetching %s: %s", metric, *res.Error)
|
||||
}
|
||||
|
||||
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||
scope := scopeForMetric[metric]
|
||||
jobMetric, ok := jobData[metric][scope]
|
||||
if !ok {
|
||||
jobMetric = &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Scope: scope,
|
||||
Timestep: mc.Timestep,
|
||||
Series: make([]schema.Series, 0),
|
||||
}
|
||||
jobData[metric][scope] = jobMetric
|
||||
}
|
||||
|
||||
id := (*int)(nil)
|
||||
if res.Query.Type != nil {
|
||||
id = new(int)
|
||||
*id, _ = strconv.Atoi(res.Query.TypeIds[0])
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
// TODO: use schema.Float instead of float64?
|
||||
// This is done because regular float64 can not be JSONed when NaN.
|
||||
res.Avg = schema.Float(0)
|
||||
res.Min = schema.Float(0)
|
||||
res.Max = schema.Float(0)
|
||||
}
|
||||
|
||||
jobMetric.Series = append(jobMetric.Series, schema.Series{
|
||||
Hostname: res.Query.Hostname,
|
||||
Id: id,
|
||||
Statistics: &schema.MetricStatistics{
|
||||
Avg: float64(res.Avg),
|
||||
Min: float64(res.Min),
|
||||
Max: float64(res.Max),
|
||||
},
|
||||
Data: res.Data,
|
||||
})
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
res, err := ccms.doRequest(job, "stats", metrics, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resdata := make([]map[string]ApiStatsData, 0, len(job.Resources))
|
||||
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var jobData schema.JobData = make(schema.JobData)
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
for _, metric := range metrics {
|
||||
mc := config.GetMetricConfig(job.ClusterID, metric)
|
||||
metricData := &schema.JobMetric{
|
||||
Scope: "node", // TODO: FIXME: Whatever...
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Sampletime,
|
||||
Series: make([]*schema.MetricSeries, 0, len(job.Nodes)),
|
||||
}
|
||||
for i, node := range job.Nodes {
|
||||
nodestats := map[string]schema.MetricStatistics{}
|
||||
for i, node := range job.Resources {
|
||||
if node.Accelerators != nil || node.HWThreads != nil {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
|
||||
}
|
||||
|
||||
data := resdata[i][metric]
|
||||
if data.Error != nil {
|
||||
return nil, errors.New(*data.Error)
|
||||
}
|
||||
|
||||
if data.Avg == nil || data.Min == nil || data.Max == nil {
|
||||
return nil, errors.New("no data")
|
||||
if data.Samples == 0 {
|
||||
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node.Hostname, metric)
|
||||
}
|
||||
|
||||
metricData.Series = append(metricData.Series, &schema.MetricSeries{
|
||||
NodeID: node,
|
||||
Data: data.Data,
|
||||
Statistics: &schema.MetricStatistics{
|
||||
Avg: *data.Avg,
|
||||
Min: *data.Min,
|
||||
Max: *data.Max,
|
||||
},
|
||||
})
|
||||
nodestats[node.Hostname] = schema.MetricStatistics{
|
||||
Avg: float64(data.Avg),
|
||||
Min: float64(data.Min),
|
||||
Max: float64(data.Max),
|
||||
}
|
||||
}
|
||||
jobData[metric] = metricData
|
||||
|
||||
stats[metric] = nodestats
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error) {
|
||||
reqBody := ApiRequestBody{}
|
||||
reqBody.Metrics = metrics
|
||||
for _, node := range nodes {
|
||||
reqBody.Selectors = append(reqBody.Selectors, []string{clusterId, node})
|
||||
}
|
||||
|
||||
reqBodyBytes, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var req *http.Request
|
||||
if nodes == nil {
|
||||
req, err = http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%s/%d/%d/all-nodes", ccms.url, clusterId, from, to), bytes.NewReader(reqBodyBytes))
|
||||
} else {
|
||||
req, err = http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/api/%d/%d/timeseries", ccms.url, from, to), bytes.NewReader(reqBodyBytes))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ccms.jwt != "" {
|
||||
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||
}
|
||||
res, err := ccms.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data := map[string]map[string][]schema.Float{}
|
||||
if nodes == nil {
|
||||
resdata := map[string]map[string]ApiMetricData{}
|
||||
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for node, metrics := range resdata {
|
||||
nodedata := map[string][]schema.Float{}
|
||||
for metric, data := range metrics {
|
||||
if data.Error != nil {
|
||||
return nil, errors.New(*data.Error)
|
||||
}
|
||||
|
||||
nodedata[metric] = data.Data
|
||||
}
|
||||
data[node] = nodedata
|
||||
}
|
||||
} else {
|
||||
resdata := make([]map[string]ApiMetricData, 0, len(nodes))
|
||||
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i, node := range nodes {
|
||||
metricsData := map[string][]schema.Float{}
|
||||
for metric, data := range resdata[i] {
|
||||
if data.Error != nil {
|
||||
return nil, errors.New(*data.Error)
|
||||
}
|
||||
|
||||
metricsData[metric] = data.Data
|
||||
}
|
||||
|
||||
data[node] = metricsData
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
179
metricdata/influxdb-v2.go
Normal file
179
metricdata/influxdb-v2.go
Normal file
@ -0,0 +1,179 @@
|
||||
package metricdata
|
||||
|
||||
/*
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||
)
|
||||
|
||||
type InfluxDBv2DataRepository struct {
|
||||
client influxdb2.Client
|
||||
queryClient influxdb2Api.QueryAPI
|
||||
bucket, measurement string
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) Init(url string) error {
|
||||
token := os.Getenv("INFLUXDB_V2_TOKEN")
|
||||
if token == "" {
|
||||
log.Println("warning: environment variable 'INFLUXDB_V2_TOKEN' not set")
|
||||
}
|
||||
|
||||
idb.client = influxdb2.NewClient(url, token)
|
||||
idb.queryClient = idb.client.QueryAPI("ClusterCockpit")
|
||||
idb.bucket = "ClusterCockpit/data"
|
||||
idb.measurement = "data"
|
||||
return nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) formatTime(t time.Time) string {
|
||||
return fmt.Sprintf("%d-%02d-%02dT%02d:%02d:%02dZ",
|
||||
t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second())
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
||||
fieldsConds := make([]string, 0, len(metrics))
|
||||
for _, m := range metrics {
|
||||
fieldsConds = append(fieldsConds, fmt.Sprintf(`r._field == "%s"`, m))
|
||||
}
|
||||
fieldsCond := strings.Join(fieldsConds, " or ")
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO/FIXME...
|
||||
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
|
||||
}
|
||||
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
query := fmt.Sprintf(`from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => r._measurement == "%s" and (%s) and (%s))
|
||||
|> drop(columns: ["_start", "_stop", "_measurement"])`, idb.bucket,
|
||||
idb.formatTime(job.StartTime), idb.formatTime(job.StartTime.Add(time.Duration(job.Duration)).Add(1*time.Second)),
|
||||
idb.measurement, hostsCond, fieldsCond)
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobData := make(schema.JobData)
|
||||
|
||||
var currentSeries *schema.MetricSeries = nil
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
if currentSeries == nil || rows.TableChanged() {
|
||||
field, host := row.Field(), row.ValueByKey("host").(string)
|
||||
jobMetric, ok := jobData[field]
|
||||
if !ok {
|
||||
mc := config.GetMetricConfig(job.Cluster, field)
|
||||
jobMetric = &schema.JobMetric{
|
||||
Scope: "node", // TODO: FIXME: Whatever...
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Timestep,
|
||||
Series: make([]*schema.MetricSeries, 0, len(job.Resources)),
|
||||
}
|
||||
jobData[field] = jobMetric
|
||||
}
|
||||
|
||||
currentSeries = &schema.MetricSeries{
|
||||
Hostname: host,
|
||||
Statistics: nil,
|
||||
Data: make([]schema.Float, 0),
|
||||
}
|
||||
jobMetric.Series = append(jobMetric.Series, currentSeries)
|
||||
}
|
||||
|
||||
val := row.Value().(float64)
|
||||
currentSeries.Data = append(currentSeries.Data, schema.Float(val))
|
||||
}
|
||||
|
||||
stats, err := idb.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for metric, nodes := range stats {
|
||||
jobMetric := jobData[metric]
|
||||
for node, stats := range nodes {
|
||||
for _, series := range jobMetric.Series {
|
||||
if series.Hostname == node {
|
||||
series.Statistics = &stats
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadStats(job *model.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO/FIXME...
|
||||
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
|
||||
}
|
||||
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
for _, metric := range metrics {
|
||||
query := fmt.Sprintf(`
|
||||
data = from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => r._measurement == "%s" and r._field == "%s" and (%s))
|
||||
|
||||
union(tables: [
|
||||
data |> mean(column: "_value") |> set(key: "_field", value: "avg")
|
||||
data |> min(column: "_value") |> set(key: "_field", value: "min")
|
||||
data |> max(column: "_value") |> set(key: "_field", value: "max")
|
||||
])
|
||||
|> pivot(rowKey: ["host"], columnKey: ["_field"], valueColumn: "_value")
|
||||
|> group()`, idb.bucket,
|
||||
idb.formatTime(job.StartTime), idb.formatTime(job.StartTime.Add(time.Duration(job.Duration)).Add(1*time.Second)),
|
||||
idb.measurement, metric, hostsCond)
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodes := map[string]schema.MetricStatistics{}
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
host := row.ValueByKey("host").(string)
|
||||
avg, min, max := row.ValueByKey("avg").(float64),
|
||||
row.ValueByKey("min").(float64),
|
||||
row.ValueByKey("max").(float64)
|
||||
|
||||
nodes[host] = schema.MetricStatistics{
|
||||
Avg: avg,
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
stats[metric] = nodes
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error) {
|
||||
return nil, nil
|
||||
}
|
||||
*/
|
@ -2,31 +2,74 @@ package metricdata
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
)
|
||||
|
||||
var runningJobs *CCMetricStore
|
||||
type MetricDataRepository interface {
|
||||
// Initialize this MetricDataRepository. One instance of
|
||||
// this interface will only ever be responsible for one cluster.
|
||||
Init(url, token string) error
|
||||
|
||||
func init() {
|
||||
runningJobs = &CCMetricStore{}
|
||||
if err := runningJobs.Init(); err != nil {
|
||||
log.Fatalln(err)
|
||||
// Return the JobData for the given job, only with the requested metrics.
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
|
||||
|
||||
// Return a map of metrics to a map of nodes to the metric statistics of the job.
|
||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||
|
||||
// Return a map of nodes to a map of metrics to the data for the requested time.
|
||||
LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error)
|
||||
}
|
||||
|
||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||
|
||||
var JobArchivePath string
|
||||
|
||||
var useArchive bool
|
||||
|
||||
func Init(jobArchivePath string, disableArchive bool) error {
|
||||
useArchive = !disableArchive
|
||||
JobArchivePath = jobArchivePath
|
||||
for _, cluster := range config.Clusters {
|
||||
if cluster.MetricDataRepository != nil {
|
||||
switch cluster.MetricDataRepository.Kind {
|
||||
case "cc-metric-store":
|
||||
ccms := &CCMetricStore{}
|
||||
if err := ccms.Init(cluster.MetricDataRepository.Url, cluster.MetricDataRepository.Token); err != nil {
|
||||
return err
|
||||
}
|
||||
metricDataRepos[cluster.Name] = ccms
|
||||
// case "influxdb-v2":
|
||||
// idb := &InfluxDBv2DataRepository{}
|
||||
// if err := idb.Init(cluster.MetricDataRepository.Url); err != nil {
|
||||
// return err
|
||||
// }
|
||||
// metricDataRepos[cluster.Name] = idb
|
||||
default:
|
||||
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", cluster.MetricDataRepository.Kind, cluster.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
||||
if job.State == model.JobStateRunning {
|
||||
return runningJobs.LoadData(job, metrics, ctx)
|
||||
}
|
||||
func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
if job.State == schema.JobStateRunning || !useArchive {
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
if job.State != model.JobStateCompleted {
|
||||
return nil, fmt.Errorf("job of state '%s' is not supported", job.State)
|
||||
data, err := repo.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
calcStatisticsSeries(job, data)
|
||||
return data, nil
|
||||
}
|
||||
|
||||
data, err := loadFromArchive(job)
|
||||
@ -47,10 +90,58 @@ func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.Job
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(job *model.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
|
||||
if job.State != model.JobStateCompleted {
|
||||
return errors.New("only completed jobs are supported")
|
||||
func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
|
||||
if job.State != schema.JobStateRunning && useArchive {
|
||||
return loadAveragesFromArchive(job, metrics, data)
|
||||
}
|
||||
|
||||
return loadAveragesFromArchive(job, metrics, data)
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
continue
|
||||
}
|
||||
|
||||
sum := 0.0
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
}
|
||||
data[i] = append(data[i], schema.Float(sum))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error) {
|
||||
repo, ok := metricDataRepos[clusterId]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", clusterId)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range config.GetClusterConfig(clusterId).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeData(clusterId, metrics, nodes, from, to, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", clusterId)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
115
rest-api.go
115
rest-api.go
@ -1,115 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
type StartJobRequestBody struct {
|
||||
JobId string `json:"job_id"`
|
||||
UserId string `json:"user_id"`
|
||||
ProjectId string `json:"project_id"`
|
||||
ClusterId string `json:"cluster_id"`
|
||||
StartTime int64 `json:"start_time"`
|
||||
Nodes []string `json:"nodes"`
|
||||
Metadata string `json:"metadata"`
|
||||
}
|
||||
|
||||
type StartJobResponeBody struct {
|
||||
DBID int64 `json:"db_id"`
|
||||
}
|
||||
|
||||
type StopJobRequestBody struct {
|
||||
DBID *int64 `json:"db_id"`
|
||||
JobId string `json:"job_id"`
|
||||
ClusterId string `json:"cluster_id"`
|
||||
StartTime int64 `json:"start_time"`
|
||||
|
||||
StopTime int64 `json:"stop_time"`
|
||||
}
|
||||
|
||||
func startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
req := StartJobRequestBody{}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if config.GetClusterConfig(req.ClusterId) == nil {
|
||||
http.Error(rw, fmt.Sprintf("cluster '%s' does not exist", req.ClusterId), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
res, err := db.Exec(
|
||||
`INSERT INTO job (job_id, user_id, cluster_id, start_time, duration, job_state, num_nodes, node_list, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);`,
|
||||
req.JobId, req.UserId, req.ClusterId, req.StartTime, 0, model.JobStateRunning, len(req.Nodes), strings.Join(req.Nodes, ","), req.Metadata)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("New job started (db-id=%d)\n", id)
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(StartJobResponeBody{
|
||||
DBID: id,
|
||||
})
|
||||
}
|
||||
|
||||
func stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||
req := StopJobRequestBody{}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
var err error
|
||||
var job *model.Job
|
||||
if req.DBID != nil {
|
||||
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").Where("job.id = ?", req.DBID).RunWith(db).QueryRow())
|
||||
} else {
|
||||
job, err = graph.ScanJob(sq.Select(graph.JobTableCols...).From("job").
|
||||
Where("job.job_id = ?", req.JobId).
|
||||
Where("job.cluster_id = ?", req.ClusterId).
|
||||
Where("job.start_time = ?", req.StartTime).
|
||||
RunWith(db).QueryRow())
|
||||
}
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != model.JobStateRunning {
|
||||
http.Error(rw, "stop_time must be larger than start_time and only running jobs can be stopped", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
job.Duration = int(job.StartTime.Unix() - req.StopTime)
|
||||
if err := metricdata.ArchiveJob(job, r.Context()); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := db.Exec(`UPDATE job SET job.duration = ?, job.job_state = ? WHERE job.id = ?;`,
|
||||
job.Duration, model.JobStateCompleted, job.ID); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
}
|
150
schema/job.go
Normal file
150
schema/job.go
Normal file
@ -0,0 +1,150 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Common subset of Job and JobMeta. Use one of those, not
|
||||
// this type directly.
|
||||
type BaseJob struct {
|
||||
JobID int64 `json:"jobId" db:"job_id"`
|
||||
User string `json:"user" db:"user"`
|
||||
Project string `json:"project" db:"project"`
|
||||
Cluster string `json:"cluster" db:"cluster"`
|
||||
Partition string `json:"partition" db:"partition"`
|
||||
ArrayJobId int32 `json:"arrayJobId" db:"array_job_id"`
|
||||
NumNodes int32 `json:"numNodes" db:"num_nodes"`
|
||||
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads"`
|
||||
NumAcc int32 `json:"numAcc" db:"num_acc"`
|
||||
Exclusive int32 `json:"exclusive" db:"exclusive"`
|
||||
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status"`
|
||||
SMT int32 `json:"smt" db:"smt"`
|
||||
State JobState `json:"jobState" db:"job_state"`
|
||||
Duration int32 `json:"duration" db:"duration"`
|
||||
Tags []*Tag `json:"tags"`
|
||||
RawResources []byte `json:"-" db:"resources"`
|
||||
Resources []*Resource `json:"resources"`
|
||||
MetaData interface{} `json:"metaData" db:"meta_data"`
|
||||
}
|
||||
|
||||
// This type is used as the GraphQL interface and using sqlx as a table row.
|
||||
type Job struct {
|
||||
ID int64 `json:"id" db:"id"`
|
||||
BaseJob
|
||||
StartTime time.Time `json:"startTime" db:"start_time"`
|
||||
MemUsedMax float64 `json:"-" db:"mem_used_max"`
|
||||
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"`
|
||||
MemBwAvg float64 `json:"-" db:"mem_bw_avg"`
|
||||
LoadAvg float64 `json:"-" db:"load_avg"`
|
||||
NetBwAvg float64 `json:"-" db:"net_bw_avg"`
|
||||
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"`
|
||||
FileBwAvg float64 `json:"-" db:"file_bw_avg"`
|
||||
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"`
|
||||
}
|
||||
|
||||
// When reading from the database or sending data via GraphQL, the start time can be in the much more
|
||||
// convenient time.Time type. In the `meta.json` files, the start time is encoded as a unix epoch timestamp.
|
||||
// This is why there is this struct, which contains all fields from the regular job struct, but "overwrites"
|
||||
// the StartTime field with one of type int64.
|
||||
type JobMeta struct {
|
||||
BaseJob
|
||||
StartTime int64 `json:"startTime" db:"start_time"`
|
||||
Statistics map[string]JobStatistics `json:"statistics,omitempty"`
|
||||
}
|
||||
|
||||
var JobDefaults BaseJob = BaseJob{
|
||||
Exclusive: 1,
|
||||
MonitoringStatus: 1,
|
||||
MetaData: "",
|
||||
}
|
||||
|
||||
var JobColumns []string = []string{
|
||||
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.start_time", "job.partition", "job.array_job_id", "job.num_nodes",
|
||||
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
||||
"job.duration", "job.resources", "job.meta_data",
|
||||
}
|
||||
|
||||
type Scannable interface {
|
||||
StructScan(dest interface{}) error
|
||||
}
|
||||
|
||||
// Helper function for scanning jobs with the `jobTableCols` columns selected.
|
||||
func ScanJob(row Scannable) (*Job, error) {
|
||||
job := &Job{BaseJob: JobDefaults}
|
||||
if err := row.StructScan(job); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if job.Duration == 0 && job.State == JobStateRunning {
|
||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||
}
|
||||
|
||||
job.RawResources = nil
|
||||
return job, nil
|
||||
}
|
||||
|
||||
type JobStatistics struct {
|
||||
Unit string `json:"unit"`
|
||||
Avg float64 `json:"avg"`
|
||||
Min float64 `json:"min"`
|
||||
Max float64 `json:"max"`
|
||||
}
|
||||
|
||||
type Tag struct {
|
||||
ID int64 `json:"id" db:"id"`
|
||||
Type string `json:"type" db:"tag_type"`
|
||||
Name string `json:"name" db:"tag_name"`
|
||||
}
|
||||
|
||||
type Resource struct {
|
||||
Hostname string `json:"hostname"`
|
||||
HWThreads []int `json:"hwthreads,omitempty"`
|
||||
Accelerators []int `json:"accelerators,omitempty"`
|
||||
Configuration string `json:"configuration,omitempty"`
|
||||
}
|
||||
|
||||
type JobState string
|
||||
|
||||
const (
|
||||
JobStateRunning JobState = "running"
|
||||
JobStateCompleted JobState = "completed"
|
||||
JobStateFailed JobState = "failed"
|
||||
JobStateCanceled JobState = "canceled"
|
||||
JobStateStopped JobState = "stopped"
|
||||
JobStateTimeout JobState = "timeout"
|
||||
)
|
||||
|
||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
}
|
||||
|
||||
*e = JobState(str)
|
||||
if !e.Valid() {
|
||||
return errors.New("invalid job state")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e JobState) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprintf(w, "\"%s\"", e)
|
||||
}
|
||||
|
||||
func (e JobState) Valid() bool {
|
||||
return e == JobStateRunning ||
|
||||
e == JobStateCompleted ||
|
||||
e == JobStateFailed ||
|
||||
e == JobStateCanceled ||
|
||||
e == JobStateStopped ||
|
||||
e == JobStateTimeout
|
||||
}
|
@ -5,39 +5,21 @@ import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// Format of `data.json` files.
|
||||
type JobData map[string]*JobMetric
|
||||
type JobData map[string]map[MetricScope]*JobMetric
|
||||
|
||||
type JobMetric struct {
|
||||
Unit string `json:"unit"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Timestep int `json:"timestep"`
|
||||
Series []*MetricSeries `json:"series"`
|
||||
Unit string `json:"unit"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Timestep int `json:"timestep"`
|
||||
Series []Series `json:"series"`
|
||||
StatisticsSeries *StatsSeries `json:"statisticsSeries"`
|
||||
}
|
||||
|
||||
type MetricScope string
|
||||
|
||||
const (
|
||||
MetricScopeNode MetricScope = "node"
|
||||
MetricScopeSocket MetricScope = "socket"
|
||||
MetricScopeCpu MetricScope = "cpu"
|
||||
)
|
||||
|
||||
func (e *MetricScope) UnmarshalGQL(v interface{}) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
}
|
||||
|
||||
*e = MetricScope(str)
|
||||
if *e != "node" && *e != "socket" && *e != "cpu" {
|
||||
return fmt.Errorf("%s is not a valid MetricScope", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e MetricScope) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprintf(w, "\"%s\"", e)
|
||||
type Series struct {
|
||||
Hostname string `json:"hostname"`
|
||||
Id *int `json:"id,omitempty"`
|
||||
Statistics *MetricStatistics `json:"statistics"`
|
||||
Data []Float `json:"data"`
|
||||
}
|
||||
|
||||
type MetricStatistics struct {
|
||||
@ -46,33 +28,51 @@ type MetricStatistics struct {
|
||||
Max float64 `json:"max"`
|
||||
}
|
||||
|
||||
type MetricSeries struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Statistics *MetricStatistics `json:"statistics"`
|
||||
Data []Float `json:"data"`
|
||||
type StatsSeries struct {
|
||||
Mean []Float `json:"mean"`
|
||||
Min []Float `json:"min"`
|
||||
Max []Float `json:"max"`
|
||||
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
||||
}
|
||||
|
||||
type JobMetaStatistics struct {
|
||||
Unit string `json:"unit"`
|
||||
Avg float64 `json:"avg"`
|
||||
Min float64 `json:"min"`
|
||||
Max float64 `json:"max"`
|
||||
type MetricScope string
|
||||
|
||||
const (
|
||||
MetricScopeNode MetricScope = "node"
|
||||
MetricScopeSocket MetricScope = "socket"
|
||||
MetricScopeCpu MetricScope = "cpu"
|
||||
MetricScopeHWThread MetricScope = "hwthread"
|
||||
)
|
||||
|
||||
var metricScopeGranularity map[MetricScope]int = map[MetricScope]int{
|
||||
MetricScopeNode: 1,
|
||||
MetricScopeSocket: 2,
|
||||
MetricScopeCpu: 3,
|
||||
MetricScopeHWThread: 4,
|
||||
}
|
||||
|
||||
// Format of `meta.json` files.
|
||||
type JobMeta struct {
|
||||
JobId string `json:"job_id"`
|
||||
UserId string `json:"user_id"`
|
||||
ProjectId string `json:"project_id"`
|
||||
ClusterId string `json:"cluster_id"`
|
||||
NumNodes int `json:"num_nodes"`
|
||||
JobState string `json:"job_state"`
|
||||
StartTime int64 `json:"start_time"`
|
||||
Duration int64 `json:"duration"`
|
||||
Nodes []string `json:"nodes"`
|
||||
Tags []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
} `json:"tags"`
|
||||
Statistics map[string]*JobMetaStatistics `json:"statistics"`
|
||||
func (e *MetricScope) MaxGranularity(other MetricScope) MetricScope {
|
||||
a := metricScopeGranularity[*e]
|
||||
b := metricScopeGranularity[other]
|
||||
if a < b {
|
||||
return *e
|
||||
}
|
||||
return other
|
||||
}
|
||||
|
||||
func (e *MetricScope) UnmarshalGQL(v interface{}) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
}
|
||||
|
||||
*e = MetricScope(str)
|
||||
if _, ok := metricScopeGranularity[*e]; !ok {
|
||||
return fmt.Errorf("%s is not a valid MetricScope", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e MetricScope) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprintf(w, "\"%s\"", e)
|
||||
}
|
||||
|
461
server.go
461
server.go
@ -3,19 +3,23 @@ package main
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
"strconv"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/api"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/templates"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/jmoiron/sqlx"
|
||||
@ -24,86 +28,423 @@ import (
|
||||
|
||||
var db *sqlx.DB
|
||||
|
||||
func main() {
|
||||
var reinitDB bool
|
||||
var port, staticFiles, jobDBFile string
|
||||
// Format of the configurartion (file). See below for the defaults.
|
||||
type ProgramConfig struct {
|
||||
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||
Addr string `json:"addr"`
|
||||
|
||||
flag.StringVar(&port, "port", "8080", "Port on which to listen")
|
||||
flag.StringVar(&staticFiles, "static-files", "./frontend/public", "Directory who's contents shall be served as static files")
|
||||
flag.StringVar(&jobDBFile, "job-db", "./var/job.db", "SQLite 3 Jobs Database File")
|
||||
flag.BoolVar(&reinitDB, "init-db", false, "Initialize new SQLite Database")
|
||||
// Disable authentication (for everything: API, Web-UI, ...)
|
||||
DisableAuthentication bool `json:"disable-authentication"`
|
||||
|
||||
// Folder where static assets can be found, will be served directly
|
||||
StaticFiles string `json:"static-files"`
|
||||
|
||||
// Currently only SQLite3 ist supported, so this should be a filename
|
||||
DB string `json:"db"`
|
||||
|
||||
// Path to the job-archive
|
||||
JobArchive string `json:"job-archive"`
|
||||
|
||||
// Make the /api/jobs/stop_job endpoint do the heavy work in the background.
|
||||
AsyncArchiving bool `json:"async-archive"`
|
||||
|
||||
// Keep all metric data in the metric data repositories,
|
||||
// do not write to the job-archive.
|
||||
DisableArchive bool `json:"disable-archive"`
|
||||
|
||||
// For LDAP Authentication and user syncronisation.
|
||||
LdapConfig *auth.LdapConfig `json:"ldap"`
|
||||
|
||||
// If both those options are not empty, use HTTPS using those certificates.
|
||||
HttpsCertFile string `json:"https-cert-file"`
|
||||
HttpsKeyFile string `json:"https-key-file"`
|
||||
|
||||
// If overwriten, at least all the options in the defaults below must
|
||||
// be provided! Most options here can be overwritten by the user.
|
||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
||||
|
||||
// Where to store MachineState files
|
||||
MachineStateDir string `json:"machine-state-dir"`
|
||||
}
|
||||
|
||||
var programConfig ProgramConfig = ProgramConfig{
|
||||
Addr: "0.0.0.0:8080",
|
||||
DisableAuthentication: false,
|
||||
StaticFiles: "./frontend/public",
|
||||
DB: "./var/job.db",
|
||||
JobArchive: "./var/job-archive",
|
||||
AsyncArchiving: true,
|
||||
DisableArchive: false,
|
||||
LdapConfig: &auth.LdapConfig{
|
||||
Url: "ldap://localhost",
|
||||
UserBase: "ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
|
||||
SearchDN: "cn=admin,dc=rrze,dc=uni-erlangen,dc=de",
|
||||
UserBind: "uid={username},ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
|
||||
UserFilter: "(&(objectclass=posixAccount)(uid=*))",
|
||||
},
|
||||
HttpsCertFile: "",
|
||||
HttpsKeyFile: "",
|
||||
UiDefaults: map[string]interface{}{
|
||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"},
|
||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"plot_general_colorBackground": true,
|
||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||
"plot_general_lineWidth": 1,
|
||||
"plot_list_jobsPerPage": 10,
|
||||
"plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw", "clock"},
|
||||
"plot_view_plotsPerRow": 4,
|
||||
"plot_view_showPolarplot": true,
|
||||
"plot_view_showRoofline": true,
|
||||
"plot_view_showStatTable": true,
|
||||
},
|
||||
MachineStateDir: "./var/machine-state",
|
||||
}
|
||||
|
||||
func main() {
|
||||
var flagReinitDB, flagStopImmediately, flagSyncLDAP bool
|
||||
var flagConfigFile string
|
||||
var flagNewUser, flagDelUser, flagGenJWT string
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize `job`, `tag`, and `jobtag` tables")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the `user` table with ldap")
|
||||
flag.BoolVar(&flagStopImmediately, "no-server", false, "Do not start a server, stop right after initialization and argument handling")
|
||||
flag.StringVar(&flagConfigFile, "config", "", "Location of the config file for this server (overwrites the defaults)")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin|api]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by username")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by the username")
|
||||
flag.Parse()
|
||||
|
||||
var err error
|
||||
db, err = sqlx.Open("sqlite3", jobDBFile)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// See https://github.com/mattn/go-sqlite3/issues/274
|
||||
db.SetMaxOpenConns(1)
|
||||
defer db.Close()
|
||||
|
||||
if reinitDB {
|
||||
if err = initDB(db, metricdata.JobArchivePath); err != nil {
|
||||
if flagConfigFile != "" {
|
||||
data, err := os.ReadFile(flagConfigFile)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
if err := json.Unmarshal(data, &programConfig); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
config.Clusters, err = loadClusters()
|
||||
var err error
|
||||
// This might need to change for other databases:
|
||||
db, err = sqlx.Open("sqlite3", fmt.Sprintf("%s?_foreign_keys=on", programConfig.DB))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Only for sqlite, not needed for any other database:
|
||||
db.SetMaxOpenConns(1)
|
||||
|
||||
// Initialize sub-modules...
|
||||
|
||||
if !programConfig.DisableAuthentication {
|
||||
if err := auth.Init(db, programConfig.LdapConfig); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
if flagNewUser != "" {
|
||||
if err := auth.AddUserToDB(db, flagNewUser); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
if flagDelUser != "" {
|
||||
if err := auth.DelUserFromDB(db, flagDelUser); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if flagSyncLDAP {
|
||||
auth.SyncWithLDAP(db)
|
||||
}
|
||||
|
||||
if flagGenJWT != "" {
|
||||
user, err := auth.FetchUserFromDB(db, flagGenJWT)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
if !user.IsAPIUser {
|
||||
log.Println("warning: that user does not have the API role")
|
||||
}
|
||||
|
||||
jwt, err := auth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
fmt.Printf("JWT for '%s': %s\n", user.Username, jwt)
|
||||
}
|
||||
} else if flagNewUser != "" || flagDelUser != "" {
|
||||
log.Fatalln("arguments --add-user and --del-user can only be used if authentication is enabled")
|
||||
}
|
||||
|
||||
if err := config.Init(db, !programConfig.DisableAuthentication, programConfig.UiDefaults, programConfig.JobArchive); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
if err := metricdata.Init(programConfig.JobArchive, programConfig.DisableArchive); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
if flagReinitDB {
|
||||
if err := initDB(db, programConfig.JobArchive); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if flagStopImmediately {
|
||||
return
|
||||
}
|
||||
|
||||
// Build routes...
|
||||
|
||||
resolver := &graph.Resolver{DB: db}
|
||||
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
|
||||
// graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
// switch e := err.(type) {
|
||||
// case string:
|
||||
// return fmt.Errorf("panic: %s", e)
|
||||
// case error:
|
||||
// return fmt.Errorf("panic caused by: %w", e)
|
||||
// }
|
||||
|
||||
// return errors.New("internal server error (panic)")
|
||||
// })
|
||||
|
||||
graphQLPlayground := playground.Handler("GraphQL playground", "/query")
|
||||
api := &api.RestApi{
|
||||
DB: db,
|
||||
AsyncArchiving: programConfig.AsyncArchiving,
|
||||
Resolver: resolver,
|
||||
MachineStateDir: programConfig.MachineStateDir,
|
||||
}
|
||||
|
||||
handleGetLogin := func(rw http.ResponseWriter, r *http.Request) {
|
||||
templates.Render(rw, r, "login.html", &templates.Page{
|
||||
Title: "Login",
|
||||
Login: &templates.LoginPage{},
|
||||
})
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
loggedRouter := handlers.LoggingHandler(os.Stdout, r)
|
||||
r.NotFoundHandler = http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
templates.Render(rw, r, "404.html", &templates.Page{
|
||||
Title: "Not found",
|
||||
})
|
||||
})
|
||||
|
||||
srv := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{
|
||||
Resolvers: &graph.Resolver{DB: db}}))
|
||||
r.HandleFunc("/graphql-playground", playground.Handler("GraphQL playground", "/query"))
|
||||
r.Handle("/query", srv)
|
||||
r.Handle("/playground", graphQLPlayground)
|
||||
r.Handle("/login", auth.Login(db)).Methods(http.MethodPost)
|
||||
r.HandleFunc("/login", handleGetLogin).Methods(http.MethodGet)
|
||||
r.HandleFunc("/logout", auth.Logout).Methods(http.MethodPost)
|
||||
|
||||
r.HandleFunc("/config.json", config.ServeConfig).Methods("GET")
|
||||
|
||||
r.HandleFunc("/api/start-job", startJob).Methods("POST")
|
||||
r.HandleFunc("/api/stop-job", stopJob).Methods("POST")
|
||||
|
||||
if len(staticFiles) != 0 {
|
||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(staticFiles)))
|
||||
secured := r.PathPrefix("/").Subrouter()
|
||||
if !programConfig.DisableAuthentication {
|
||||
secured.Use(auth.Auth)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
log.Printf("GraphQL playground: http://localhost:%s/graphql-playground", port)
|
||||
log.Printf("Home: http://localhost:%s/index.html", port)
|
||||
log.Fatal(http.ListenAndServe("127.0.0.1:"+port,
|
||||
handlers.CORS(handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"}))(loggedRouter)))
|
||||
}
|
||||
|
||||
func loadClusters() ([]*model.Cluster, error) {
|
||||
entries, err := os.ReadDir(metricdata.JobArchivePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
clusters := []*model.Cluster{}
|
||||
for _, de := range entries {
|
||||
bytes, err := os.ReadFile(filepath.Join(metricdata.JobArchivePath, de.Name(), "cluster.json"))
|
||||
secured.HandleFunc("/", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
var cluster model.Cluster
|
||||
if err := json.Unmarshal(bytes, &cluster); err != nil {
|
||||
return nil, err
|
||||
infos := map[string]interface{}{
|
||||
"clusters": config.Clusters,
|
||||
"username": "",
|
||||
"admin": true,
|
||||
}
|
||||
|
||||
if cluster.FilterRanges.StartTime.To.IsZero() {
|
||||
cluster.FilterRanges.StartTime.To = time.Unix(0, 0)
|
||||
if user := auth.GetUser(r.Context()); user != nil {
|
||||
infos["username"] = user.Username
|
||||
infos["admin"] = user.IsAdmin
|
||||
}
|
||||
|
||||
clusters = append(clusters, &cluster)
|
||||
templates.Render(rw, r, "home.html", &templates.Page{
|
||||
Title: "ClusterCockpit",
|
||||
Config: conf,
|
||||
Infos: infos,
|
||||
})
|
||||
})
|
||||
|
||||
monitoringRoutes(secured, resolver)
|
||||
api.MountRoutes(secured)
|
||||
|
||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(programConfig.StaticFiles)))
|
||||
handler := handlers.CORS(
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"}))(handlers.LoggingHandler(os.Stdout, handlers.CompressHandler(r)))
|
||||
|
||||
// Start http or https server
|
||||
if programConfig.HttpsCertFile != "" && programConfig.HttpsKeyFile != "" {
|
||||
log.Printf("HTTPS server running at %s...", programConfig.Addr)
|
||||
err = http.ListenAndServeTLS(programConfig.Addr, programConfig.HttpsCertFile, programConfig.HttpsKeyFile, handler)
|
||||
} else {
|
||||
log.Printf("HTTP server running at %s...", programConfig.Addr)
|
||||
err = http.ListenAndServe(programConfig.Addr, handler)
|
||||
}
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
func monitoringRoutes(router *mux.Router, resolver *graph.Resolver) {
|
||||
buildFilterPresets := func(query url.Values) map[string]interface{} {
|
||||
filterPresets := map[string]interface{}{}
|
||||
|
||||
if query.Get("cluster") != "" {
|
||||
filterPresets["cluster"] = query.Get("cluster")
|
||||
}
|
||||
if query.Get("project") != "" {
|
||||
filterPresets["project"] = query.Get("project")
|
||||
}
|
||||
if query.Get("state") != "" && schema.JobState(query.Get("state")).Valid() {
|
||||
filterPresets["state"] = query.Get("state")
|
||||
}
|
||||
if rawtags, ok := query["tag"]; ok {
|
||||
tags := make([]int, len(rawtags))
|
||||
for i, tid := range rawtags {
|
||||
var err error
|
||||
tags[i], err = strconv.Atoi(tid)
|
||||
if err != nil {
|
||||
tags[i] = -1
|
||||
}
|
||||
}
|
||||
filterPresets["tags"] = tags
|
||||
}
|
||||
|
||||
return filterPresets
|
||||
}
|
||||
|
||||
return clusters, nil
|
||||
router.HandleFunc("/monitoring/jobs/", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
templates.Render(rw, r, "monitoring/jobs.html", &templates.Page{
|
||||
Title: "Jobs - ClusterCockpit",
|
||||
Config: conf,
|
||||
FilterPresets: buildFilterPresets(r.URL.Query()),
|
||||
})
|
||||
})
|
||||
|
||||
router.HandleFunc("/monitoring/job/{id:[0-9]+}", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
id := mux.Vars(r)["id"]
|
||||
job, err := resolver.Query().Job(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
templates.Render(rw, r, "monitoring/job.html", &templates.Page{
|
||||
Title: fmt.Sprintf("Job %d - ClusterCockpit", job.JobID),
|
||||
Config: conf,
|
||||
Infos: map[string]interface{}{
|
||||
"id": id,
|
||||
"jobId": job.JobID,
|
||||
"clusterId": job.Cluster,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
router.HandleFunc("/monitoring/users/", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
templates.Render(rw, r, "monitoring/users.html", &templates.Page{
|
||||
Title: "Users - ClusterCockpit",
|
||||
Config: conf,
|
||||
})
|
||||
})
|
||||
|
||||
router.HandleFunc("/monitoring/user/{id}", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
id := mux.Vars(r)["id"]
|
||||
// TODO: One could check if the user exists, but that would be unhelpfull if authentication
|
||||
// is disabled or the user does not exist but has started jobs.
|
||||
|
||||
templates.Render(rw, r, "monitoring/user.html", &templates.Page{
|
||||
Title: fmt.Sprintf("User %s - ClusterCockpit", id),
|
||||
Config: conf,
|
||||
Infos: map[string]interface{}{"username": id},
|
||||
FilterPresets: buildFilterPresets(r.URL.Query()),
|
||||
})
|
||||
})
|
||||
|
||||
router.HandleFunc("/monitoring/analysis/", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
filterPresets := map[string]interface{}{}
|
||||
query := r.URL.Query()
|
||||
if query.Get("cluster") != "" {
|
||||
filterPresets["clusterId"] = query.Get("cluster")
|
||||
}
|
||||
|
||||
templates.Render(rw, r, "monitoring/analysis.html", &templates.Page{
|
||||
Title: "Analysis View - ClusterCockpit",
|
||||
Config: conf,
|
||||
FilterPresets: filterPresets,
|
||||
})
|
||||
})
|
||||
|
||||
router.HandleFunc("/monitoring/systems/", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
filterPresets := map[string]interface{}{}
|
||||
query := r.URL.Query()
|
||||
if query.Get("cluster") != "" {
|
||||
filterPresets["clusterId"] = query.Get("cluster")
|
||||
}
|
||||
|
||||
templates.Render(rw, r, "monitoring/systems.html", &templates.Page{
|
||||
Title: "System View - ClusterCockpit",
|
||||
Config: conf,
|
||||
FilterPresets: filterPresets,
|
||||
})
|
||||
})
|
||||
|
||||
router.HandleFunc("/monitoring/node/{clusterId}/{nodeId}", func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
vars := mux.Vars(r)
|
||||
templates.Render(rw, r, "monitoring/node.html", &templates.Page{
|
||||
Title: fmt.Sprintf("Node %s - ClusterCockpit", vars["nodeId"]),
|
||||
Config: conf,
|
||||
Infos: map[string]interface{}{
|
||||
"nodeId": vars["nodeId"],
|
||||
"clusterId": vars["clusterId"],
|
||||
},
|
||||
})
|
||||
})
|
||||
}
|
||||
|
10
templates/404.html
Normal file
10
templates/404.html
Normal file
@ -0,0 +1,10 @@
|
||||
{{template "base.html" .}}
|
||||
{{define "content"}}
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<div class="alert alert-error" role="alert">
|
||||
404: Not found
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
28
templates/base.html
Normal file
28
templates/base.html
Normal file
@ -0,0 +1,28 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset='utf-8'>
|
||||
<meta name='viewport' content='width=device-width,initial-scale=1'>
|
||||
<title>{{.Title}}</title>
|
||||
|
||||
<link rel='icon' type='image/png' href='/favicon.png'>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.1/dist/css/bootstrap.min.css" integrity="sha384-F3w7mX95PdgyTmZZMECAngseQB83DfGTowi0iMjiWaeVhAn4FJkqJByhZMI3AhiU" crossorigin="anonymous">
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.4.1/font/bootstrap-icons.css">
|
||||
<link rel='stylesheet' href='/global.css'>
|
||||
<link rel='stylesheet' href='/uPlot.min.css'>
|
||||
|
||||
{{block "stylesheets" .}}{{end}}
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
{{block "content" .}}
|
||||
Whoops, you should not see this...
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{block "javascript" .}}{{end}}
|
||||
</body>
|
||||
</html>
|
57
templates/home.html
Normal file
57
templates/home.html
Normal file
@ -0,0 +1,57 @@
|
||||
{{define "content"}}
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
{{if .Infos.username}}
|
||||
<i class="bi bi-person-circle"></i> {{ .Infos.username }}
|
||||
{{if .Infos.admin}}
|
||||
<span class="badge bg-primary">Admin</span>
|
||||
{{end}}
|
||||
{{end}}
|
||||
</div>
|
||||
<div class="col" style="text-align: right;">
|
||||
<form method="post" action="/logout">
|
||||
<button type="submit" class="btn btn-primary">Logout</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
{{if .Infos.admin}}
|
||||
<div class="col-4">
|
||||
<ul>
|
||||
<li><a href="/monitoring/jobs/">All jobs</a></li>
|
||||
<li><a href="/monitoring/users/">All users</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="col-4">
|
||||
<ul>
|
||||
<li><a href="/monitoring/jobs/">My jobs</a></li>
|
||||
<li><a href="/monitoring/user/{{.Infos.username}}">My user view</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
{{end}}
|
||||
<div class="col-8">
|
||||
<h2>Clusters</h2>
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Jobs</th>
|
||||
<th>System View</th>
|
||||
<th>Analysis View</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{{range .Infos.clusters}}
|
||||
<tr>
|
||||
<td>{{.Name}}</td>
|
||||
<td><a href="/monitoring/jobs/?cluster={{.Name}}">Jobs</a></td>
|
||||
<td><a href="/monitoring/systems/?cluster={{.Name}}">System View</a></td>
|
||||
<td><a href="/monitoring/analysis/?cluster={{.Name}}">Analysis View</a></td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
47
templates/login.html
Normal file
47
templates/login.html
Normal file
@ -0,0 +1,47 @@
|
||||
{{define "content"}}
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<h1>
|
||||
ClusterCockpit Login
|
||||
</h1>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
{{if .Login.Error}}
|
||||
<div class="alert alert-warning" role="alert">
|
||||
{{.Login.Error}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .Login.Info}}
|
||||
<div class="alert alert-success" role="alert">
|
||||
{{.Login.Info}}
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<form method="post" action="/login">
|
||||
<div class="mb-3">
|
||||
<label class="form-label" for="username">Username</label>
|
||||
<input class="form-control" type="text" id="username" name="username">
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<label class="form-label" for="password">Password</label>
|
||||
<input class="form-control" type="password" id="password" name="password">
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary">Login</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<br/>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<form method="post" action="/logout">
|
||||
<button type="submit" class="btn btn-primary">Logout</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
18
templates/monitoring/analysis.html
Normal file
18
templates/monitoring/analysis.html
Normal file
@ -0,0 +1,18 @@
|
||||
{{define "content"}}
|
||||
<div id="svelte-app"></div>
|
||||
{{end}}
|
||||
|
||||
{{define "stylesheets"}}
|
||||
<link rel='stylesheet' href='/build/analysis.css'>
|
||||
{{end}}
|
||||
{{define "javascript"}}
|
||||
<script>
|
||||
const filterPresets = {{ .FilterPresets }};
|
||||
const clusterCockpitConfigPromise = Promise.resolve({
|
||||
plot_view_plotsPerRow: {{ .Config.plot_view_plotsPerRow }},
|
||||
analysis_view_histogramMetrics: {{ .Config.analysis_view_histogramMetrics }},
|
||||
analysis_view_scatterPlotMetrics: {{ .Config.analysis_view_scatterPlotMetrics }}
|
||||
});
|
||||
</script>
|
||||
<script src='/build/analysis.js'></script>
|
||||
{{end}}
|
29
templates/monitoring/job.html
Normal file
29
templates/monitoring/job.html
Normal file
@ -0,0 +1,29 @@
|
||||
{{define "content"}}
|
||||
<div id="svelte-app"></div>
|
||||
{{end}}
|
||||
|
||||
{{define "stylesheets"}}
|
||||
<link rel='stylesheet' href='/build/job.css'>
|
||||
{{end}}
|
||||
{{define "javascript"}}
|
||||
<script>
|
||||
const jobInfos = {
|
||||
id: "{{ .Infos.id }}",
|
||||
jobId: "{{ .Infos.jobId }}",
|
||||
clusterId: "{{ .Infos.clusterId }}"
|
||||
};
|
||||
const clusterCockpitConfigPromise = Promise.resolve({
|
||||
plot_general_colorscheme: {{ .Config.plot_general_colorscheme }},
|
||||
plot_general_lineWidth: {{ .Config.plot_general_lineWidth }},
|
||||
plot_general_colorBackground: {{ .Config.plot_general_colorBackground }},
|
||||
plot_view_showRoofline: {{ .Config.plot_view_showRoofline }},
|
||||
plot_view_showPolarplot: {{ .Config.plot_view_showPolarplot }},
|
||||
plot_view_showStatTable: {{ .Config.plot_view_showStatTable }},
|
||||
plot_view_plotsPerRow: {{ .Config.plot_view_plotsPerRow }},
|
||||
job_view_selectedMetrics: {{ .Config.job_view_selectedMetrics }},
|
||||
job_view_nodestats_selectedMetrics: {{ .Config.job_view_nodestats_selectedMetrics }},
|
||||
job_view_polarPlotMetrics: {{ .Config.plot_view_polarPlotMetrics }},
|
||||
});
|
||||
</script>
|
||||
<script src='/build/job.js'></script>
|
||||
{{end}}
|
14
templates/monitoring/jobs.html
Normal file
14
templates/monitoring/jobs.html
Normal file
@ -0,0 +1,14 @@
|
||||
{{define "content"}}
|
||||
<div id="svelte-app"></div>
|
||||
{{end}}
|
||||
|
||||
{{define "stylesheets"}}
|
||||
<link rel='stylesheet' href='/build/jobs.css'>
|
||||
{{end}}
|
||||
{{define "javascript"}}
|
||||
<script>
|
||||
const filterPresets = {{ .FilterPresets }};
|
||||
const clusterCockpitConfig = {{ .Config }};
|
||||
</script>
|
||||
<script src='/build/jobs.js'></script>
|
||||
{{end}}
|
21
templates/monitoring/node.html
Normal file
21
templates/monitoring/node.html
Normal file
@ -0,0 +1,21 @@
|
||||
{{define "content"}}
|
||||
<div id="svelte-app"></div>
|
||||
{{end}}
|
||||
|
||||
{{define "stylesheets"}}
|
||||
<link rel='stylesheet' href='/build/node.css'>
|
||||
{{end}}
|
||||
{{define "javascript"}}
|
||||
<script>
|
||||
const nodeInfos = {
|
||||
nodeId: "{{ .Infos.nodeId }}",
|
||||
clusterId: "{{ .Infos.clusterId }}"
|
||||
};
|
||||
const clusterCockpitConfigPromise = Promise.resolve({
|
||||
plot_general_colorscheme: {{ .Config.plot_general_colorscheme }},
|
||||
plot_general_lineWidth: {{ .Config.plot_general_lineWidth }},
|
||||
plot_general_colorBackground: {{ .Config.plot_general_colorBackground }},
|
||||
});
|
||||
</script>
|
||||
<script src='/build/node.js'></script>
|
||||
{{end}}
|
19
templates/monitoring/systems.html
Normal file
19
templates/monitoring/systems.html
Normal file
@ -0,0 +1,19 @@
|
||||
{{define "content"}}
|
||||
<div id="svelte-app"></div>
|
||||
{{end}}
|
||||
|
||||
{{define "stylesheets"}}
|
||||
<link rel='stylesheet' href='/build/systems.css'>
|
||||
{{end}}
|
||||
{{define "javascript"}}
|
||||
<script>
|
||||
const filterPresets = {{ .FilterPresets }};
|
||||
const clusterCockpitConfigPromise = Promise.resolve({
|
||||
plot_view_plotsPerRow: {{ .Config.plot_view_plotsPerRow }},
|
||||
plot_general_colorscheme: {{ .Config.plot_general_colorscheme }},
|
||||
plot_general_lineWidth: {{ .Config.plot_general_lineWidth }},
|
||||
plot_general_colorBackground: {{ .Config.plot_general_colorBackground }},
|
||||
});
|
||||
</script>
|
||||
<script src='/build/systems.js'></script>
|
||||
{{end}}
|
15
templates/monitoring/user.html
Normal file
15
templates/monitoring/user.html
Normal file
@ -0,0 +1,15 @@
|
||||
{{define "content"}}
|
||||
<div id="svelte-app"></div>
|
||||
{{end}}
|
||||
|
||||
{{define "stylesheets"}}
|
||||
<link rel='stylesheet' href='/build/user.css'>
|
||||
{{end}}
|
||||
{{define "javascript"}}
|
||||
<script>
|
||||
const userInfos = {{ .Infos }};
|
||||
const filterPresets = {{ .FilterPresets }};
|
||||
const clusterCockpitConfig = {{ .Config }};
|
||||
</script>
|
||||
<script src='/build/user.js'></script>
|
||||
{{end}}
|
14
templates/monitoring/users.html
Normal file
14
templates/monitoring/users.html
Normal file
@ -0,0 +1,14 @@
|
||||
{{define "content"}}
|
||||
<div id="svelte-app"></div>
|
||||
{{end}}
|
||||
|
||||
{{define "stylesheets"}}
|
||||
<link rel='stylesheet' href='/build/users.css'>
|
||||
{{end}}
|
||||
{{define "javascript"}}
|
||||
<script>
|
||||
const filterPresets = null;
|
||||
const clusterCockpitConfigPromise = Promise.resolve({});
|
||||
</script>
|
||||
<script src='/build/users.js'></script>
|
||||
{{end}}
|
56
templates/templates.go
Normal file
56
templates/templates.go
Normal file
@ -0,0 +1,56 @@
|
||||
package templates
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"log"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
var templatesDir string
|
||||
var debugMode bool = true
|
||||
var templates map[string]*template.Template = map[string]*template.Template{}
|
||||
|
||||
type Page struct {
|
||||
Title string
|
||||
Login *LoginPage
|
||||
FilterPresets map[string]interface{}
|
||||
Infos map[string]interface{}
|
||||
Config map[string]interface{}
|
||||
}
|
||||
|
||||
type LoginPage struct {
|
||||
Error string
|
||||
Info string
|
||||
}
|
||||
|
||||
func init() {
|
||||
templatesDir = "./templates/"
|
||||
base := template.Must(template.ParseFiles(templatesDir + "base.html"))
|
||||
files := []string{
|
||||
"home.html", "404.html", "login.html",
|
||||
"monitoring/jobs.html", "monitoring/job.html",
|
||||
"monitoring/users.html", "monitoring/user.html",
|
||||
"monitoring/analysis.html",
|
||||
"monitoring/systems.html",
|
||||
"monitoring/node.html",
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
templates[file] = template.Must(template.Must(base.Clone()).ParseFiles(templatesDir + file))
|
||||
}
|
||||
}
|
||||
|
||||
func Render(rw http.ResponseWriter, r *http.Request, file string, page *Page) {
|
||||
t, ok := templates[file]
|
||||
if !ok {
|
||||
panic("templates must be predefinied!")
|
||||
}
|
||||
|
||||
if debugMode {
|
||||
t = template.Must(template.ParseFiles(templatesDir+"base.html", templatesDir+file))
|
||||
}
|
||||
|
||||
if err := t.Execute(rw, page); err != nil {
|
||||
log.Printf("template error: %s\n", err.Error())
|
||||
}
|
||||
}
|
40
utils/add-job.mjs
Normal file
40
utils/add-job.mjs
Normal file
@ -0,0 +1,40 @@
|
||||
import fetch from 'node-fetch'
|
||||
|
||||
// Just for testing
|
||||
|
||||
const job = {
|
||||
jobId: 123,
|
||||
user: 'lou',
|
||||
project: 'testproj',
|
||||
cluster: 'heidi',
|
||||
partition: 'default',
|
||||
arrayJobId: 0,
|
||||
numNodes: 1,
|
||||
numHwthreads: 8,
|
||||
numAcc: 0,
|
||||
exclusive: 1,
|
||||
monitoringStatus: 1,
|
||||
smt: 1,
|
||||
jobState: 'running',
|
||||
duration: 2*60*60,
|
||||
tags: [],
|
||||
resources: [
|
||||
{
|
||||
hostname: 'heidi',
|
||||
hwthreads: [0, 1, 2, 3, 4, 5, 6, 7]
|
||||
}
|
||||
],
|
||||
metaData: null,
|
||||
startTime: 1641427200
|
||||
}
|
||||
|
||||
fetch('http://localhost:8080/api/jobs/start_job/', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(job),
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJpc19hZG1pbiI6dHJ1ZSwiaXNfYXBpIjpmYWxzZSwic3ViIjoibG91In0.nY6dCgLSdm7zXz1xPkrb_3JnnUCgExXeXcrTlAAySs4p72VKJhmzzC1RxgkJE26l8tDYUilM-o-urzlaqK5aDA'
|
||||
}
|
||||
})
|
||||
.then(res => res.status == 200 ? res.json() : res.text())
|
||||
.then(res => console.log(res))
|
@ -1 +0,0 @@
|
||||
{"analysis_view_histogramMetrics":["flops_any","mem_bw","mem_used"],"analysis_view_scatterPlotMetrics":[["flops_any","mem_bw"],["flops_any","cpu_load"],["cpu_load","mem_bw"]],"job_view_nodestats_selectedMetrics":["flops_any","mem_bw","mem_used"],"job_view_polarPlotMetrics":["flops_any","mem_bw","mem_used","net_bw","file_bw"],"job_view_selectedMetrics":["flops_any","mem_bw","mem_used"],"plot_general_colorBackground":true,"plot_general_colorscheme":["#00bfff","#0000ff","#ff00ff","#ff0000","#ff8000","#ffff00","#80ff00"],"plot_general_lineWidth":1,"plot_list_jobsPerPage":10,"plot_list_selectedMetrics":["cpu_load","mem_used","flops_any","mem_bw","clock"],"plot_view_plotsPerRow":4,"plot_view_showPolarplot":true,"plot_view_showRoofline":true,"plot_view_showStatTable":true}
|
Loading…
Reference in New Issue
Block a user