mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-23 04:51:39 +02:00
all schemas new
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,26 +1,17 @@
|
||||
package model
|
||||
|
||||
// Go look at `gqlgen.yml` and the schema package for other non-generated models.
|
||||
|
||||
type JobTag struct {
|
||||
ID string `json:"id" db:"id"`
|
||||
TagType string `json:"tagType" db:"tag_type"`
|
||||
TagName string `json:"tagName" db:"tag_name"`
|
||||
}
|
||||
|
||||
type Cluster struct {
|
||||
ClusterID string `json:"clusterID"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
FlopRateScalar int `json:"flopRateScalar"`
|
||||
FlopRateSimd int `json:"flopRateSimd"`
|
||||
MemoryBandwidth int `json:"memoryBandwidth"`
|
||||
MetricConfig []*MetricConfig `json:"metricConfig"`
|
||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||
MetricDataRepository *struct {
|
||||
Kind string `json:"kind"`
|
||||
Url string `json:"url"`
|
||||
} `json:"metricDataRepository"`
|
||||
Name string `json:"name"`
|
||||
MetricConfig []*MetricConfig `json:"metricConfig"`
|
||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||
Partitions []*Partition `json:"partitions"`
|
||||
|
||||
// NOT part of the API:
|
||||
MetricDataRepository *MetricDataRepository `json:"metricDataRepository"`
|
||||
}
|
||||
|
||||
type MetricDataRepository struct {
|
||||
Kind string `json:"kind"`
|
||||
Url string `json:"url"`
|
||||
Token string `json:"token"`
|
||||
}
|
||||
|
@@ -11,6 +11,12 @@ import (
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
)
|
||||
|
||||
type Accelerator struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Model string `json:"model"`
|
||||
}
|
||||
|
||||
type FilterRanges struct {
|
||||
Duration *IntRangeOutput `json:"duration"`
|
||||
NumNodes *IntRangeOutput `json:"numNodes"`
|
||||
@@ -37,59 +43,43 @@ type IntRangeOutput struct {
|
||||
To int `json:"to"`
|
||||
}
|
||||
|
||||
type Job struct {
|
||||
ID string `json:"Id"`
|
||||
JobID int `json:"JobId"`
|
||||
User string `json:"User"`
|
||||
Project string `json:"Project"`
|
||||
Cluster string `json:"Cluster"`
|
||||
StartTime time.Time `json:"StartTime"`
|
||||
Duration int `json:"Duration"`
|
||||
NumNodes int `json:"NumNodes"`
|
||||
NumHWThreads int `json:"NumHWThreads"`
|
||||
NumAcc int `json:"NumAcc"`
|
||||
Smt int `json:"SMT"`
|
||||
Exclusive int `json:"Exclusive"`
|
||||
Partition string `json:"Partition"`
|
||||
ArrayJobID int `json:"ArrayJobId"`
|
||||
MonitoringStatus int `json:"MonitoringStatus"`
|
||||
State JobState `json:"State"`
|
||||
Tags []*JobTag `json:"Tags"`
|
||||
Resources []*schema.JobResource `json:"Resources"`
|
||||
LoadAvg *float64 `json:"LoadAvg"`
|
||||
MemUsedMax *float64 `json:"MemUsedMax"`
|
||||
FlopsAnyAvg *float64 `json:"FlopsAnyAvg"`
|
||||
MemBwAvg *float64 `json:"MemBwAvg"`
|
||||
NetBwAvg *float64 `json:"NetBwAvg"`
|
||||
FileBwAvg *float64 `json:"FileBwAvg"`
|
||||
}
|
||||
|
||||
type JobFilter struct {
|
||||
Tags []string `json:"tags"`
|
||||
JobID *StringInput `json:"jobId"`
|
||||
User *StringInput `json:"user"`
|
||||
Project *StringInput `json:"project"`
|
||||
Cluster *StringInput `json:"cluster"`
|
||||
Duration *IntRange `json:"duration"`
|
||||
NumNodes *IntRange `json:"numNodes"`
|
||||
StartTime *TimeRange `json:"startTime"`
|
||||
JobState []JobState `json:"jobState"`
|
||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
|
||||
MemBwAvg *FloatRange `json:"memBwAvg"`
|
||||
LoadAvg *FloatRange `json:"loadAvg"`
|
||||
MemUsedMax *FloatRange `json:"memUsedMax"`
|
||||
Tags []string `json:"tags"`
|
||||
JobID *StringInput `json:"jobId"`
|
||||
User *StringInput `json:"user"`
|
||||
Project *StringInput `json:"project"`
|
||||
Cluster *StringInput `json:"cluster"`
|
||||
Duration *IntRange `json:"duration"`
|
||||
NumNodes *IntRange `json:"numNodes"`
|
||||
StartTime *TimeRange `json:"startTime"`
|
||||
State []schema.JobState `json:"state"`
|
||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
|
||||
MemBwAvg *FloatRange `json:"memBwAvg"`
|
||||
LoadAvg *FloatRange `json:"loadAvg"`
|
||||
MemUsedMax *FloatRange `json:"memUsedMax"`
|
||||
}
|
||||
|
||||
type JobMetricWithName struct {
|
||||
Name string `json:"name"`
|
||||
Metric *schema.JobMetric `json:"metric"`
|
||||
Name string `json:"name"`
|
||||
Node *schema.JobMetric `json:"node"`
|
||||
Socket *schema.JobMetric `json:"socket"`
|
||||
MemoryDomain *schema.JobMetric `json:"memoryDomain"`
|
||||
Core *schema.JobMetric `json:"core"`
|
||||
Hwthread *schema.JobMetric `json:"hwthread"`
|
||||
}
|
||||
|
||||
type JobResource struct {
|
||||
Hostname string `json:"hostname"`
|
||||
Hwthreads []int `json:"hwthreads"`
|
||||
Accelerators []int `json:"accelerators"`
|
||||
Configuration *string `json:"configuration"`
|
||||
}
|
||||
|
||||
type JobResultList struct {
|
||||
Items []*Job `json:"items"`
|
||||
Offset *int `json:"offset"`
|
||||
Limit *int `json:"limit"`
|
||||
Count *int `json:"count"`
|
||||
Items []*schema.Job `json:"items"`
|
||||
Offset *int `json:"offset"`
|
||||
Limit *int `json:"limit"`
|
||||
Count *int `json:"count"`
|
||||
}
|
||||
|
||||
type JobsStatistics struct {
|
||||
@@ -103,14 +93,14 @@ type JobsStatistics struct {
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
Name string `json:"Name"`
|
||||
Unit string `json:"Unit"`
|
||||
Timestep int `json:"Timestep"`
|
||||
Peak int `json:"Peak"`
|
||||
Normal int `json:"Normal"`
|
||||
Caution int `json:"Caution"`
|
||||
Alert int `json:"Alert"`
|
||||
Scope string `json:"Scope"`
|
||||
Name string `json:"name"`
|
||||
Unit string `json:"unit"`
|
||||
Scope string `json:"scope"`
|
||||
Timestep int `json:"timestep"`
|
||||
Peak float64 `json:"Peak"`
|
||||
Normal float64 `json:"Normal"`
|
||||
Caution float64 `json:"Caution"`
|
||||
Alert float64 `json:"Alert"`
|
||||
}
|
||||
|
||||
type MetricFootprints struct {
|
||||
@@ -138,6 +128,18 @@ type PageRequest struct {
|
||||
Page int `json:"page"`
|
||||
}
|
||||
|
||||
type Partition struct {
|
||||
Name string `json:"name"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
FlopRateScalar int `json:"flopRateScalar"`
|
||||
FlopRateSimd int `json:"flopRateSimd"`
|
||||
MemoryBandwidth int `json:"memoryBandwidth"`
|
||||
Topology *Topology `json:"topology"`
|
||||
}
|
||||
|
||||
type StringInput struct {
|
||||
Eq *string `json:"eq"`
|
||||
Contains *string `json:"contains"`
|
||||
@@ -155,6 +157,15 @@ type TimeRangeOutput struct {
|
||||
To time.Time `json:"to"`
|
||||
}
|
||||
|
||||
type Topology struct {
|
||||
Node []int `json:"node"`
|
||||
Socket [][]int `json:"socket"`
|
||||
MemoryDomain [][]int `json:"memoryDomain"`
|
||||
Die [][]int `json:"die"`
|
||||
Core [][]int `json:"core"`
|
||||
Accelerators []*Accelerator `json:"accelerators"`
|
||||
}
|
||||
|
||||
type Aggregate string
|
||||
|
||||
const (
|
||||
@@ -198,55 +209,6 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
type JobState string
|
||||
|
||||
const (
|
||||
JobStateRunning JobState = "running"
|
||||
JobStateCompleted JobState = "completed"
|
||||
JobStateFailed JobState = "failed"
|
||||
JobStateCanceled JobState = "canceled"
|
||||
JobStateStopped JobState = "stopped"
|
||||
JobStateTimeout JobState = "timeout"
|
||||
)
|
||||
|
||||
var AllJobState = []JobState{
|
||||
JobStateRunning,
|
||||
JobStateCompleted,
|
||||
JobStateFailed,
|
||||
JobStateCanceled,
|
||||
JobStateStopped,
|
||||
JobStateTimeout,
|
||||
}
|
||||
|
||||
func (e JobState) IsValid() bool {
|
||||
switch e {
|
||||
case JobStateRunning, JobStateCompleted, JobStateFailed, JobStateCanceled, JobStateStopped, JobStateTimeout:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e JobState) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
}
|
||||
|
||||
*e = JobState(str)
|
||||
if !e.IsValid() {
|
||||
return fmt.Errorf("%s is not a valid JobState", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e JobState) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
type SortDirectionEnum string
|
||||
|
||||
const (
|
||||
|
@@ -2,15 +2,14 @@ package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/auth"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
@@ -23,44 +22,9 @@ type Resolver struct {
|
||||
DB *sqlx.DB
|
||||
}
|
||||
|
||||
var JobTableCols []string = []string{
|
||||
"id", "job_id", "cluster", "start_time",
|
||||
"user", "project", "partition", "array_job_id", "duration", "job_state", "resources",
|
||||
"num_nodes", "num_hwthreads", "num_acc", "smt", "exclusive", "monitoring_status",
|
||||
"load_avg", "mem_used_max", "flops_any_avg", "mem_bw_avg", "net_bw_avg", "file_bw_avg",
|
||||
}
|
||||
|
||||
type Scannable interface {
|
||||
Scan(dest ...interface{}) error
|
||||
}
|
||||
|
||||
// Helper function for scanning jobs with the `jobTableCols` columns selected.
|
||||
func ScanJob(row Scannable) (*model.Job, error) {
|
||||
job := &model.Job{}
|
||||
|
||||
var rawResources []byte
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.Cluster, &job.StartTime,
|
||||
&job.User, &job.Project, &job.Partition, &job.ArrayJobID, &job.Duration, &job.State, &rawResources,
|
||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Smt, &job.Exclusive, &job.MonitoringStatus,
|
||||
&job.LoadAvg, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.NetBwAvg, &job.FileBwAvg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(rawResources, &job.Resources); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if job.Duration == 0 && job.State == model.JobStateRunning {
|
||||
job.Duration = int(time.Since(job.StartTime).Seconds())
|
||||
}
|
||||
|
||||
return job, nil
|
||||
}
|
||||
|
||||
// Helper function for the `jobs` GraphQL-Query. Is also used elsewhere when a list of jobs is needed.
|
||||
func (r *Resolver) queryJobs(ctx context.Context, filters []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) ([]*model.Job, int, error) {
|
||||
query := sq.Select(JobTableCols...).From("job")
|
||||
func (r *Resolver) queryJobs(ctx context.Context, filters []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) ([]*schema.Job, int, error) {
|
||||
query := sq.Select(schema.JobColumns...).From("job")
|
||||
query = securityCheck(ctx, query)
|
||||
|
||||
if order != nil {
|
||||
@@ -85,33 +49,32 @@ func (r *Resolver) queryJobs(ctx context.Context, filters []*model.JobFilter, pa
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*model.Job, 0, 50)
|
||||
rows, err := r.DB.Queryx(sql, args...)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := ScanJob(rows)
|
||||
job, err := schema.ScanJob(rows)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
// count all jobs:
|
||||
query = sq.Select("count(*)").From("job")
|
||||
for _, f := range filters {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
rows, err = query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var count int
|
||||
rows.Next()
|
||||
if err := rows.Scan(&count); err != nil {
|
||||
if err := query.RunWith(r.DB).Scan(&count); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
@@ -132,7 +95,7 @@ func securityCheck(ctx context.Context, query sq.SelectBuilder) sq.SelectBuilder
|
||||
return query.Where("job.user_id = ?", user.Username)
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a model.JobFilter.
|
||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||
func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if filter.Tags != nil {
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where("jobtag.tag_id IN ?", filter.Tags)
|
||||
@@ -155,8 +118,8 @@ func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
if filter.Duration != nil {
|
||||
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||
}
|
||||
if filter.JobState != nil {
|
||||
query = query.Where("job.job_state IN ?", filter.JobState)
|
||||
if filter.State != nil {
|
||||
query = query.Where("job.job_state IN ?", filter.State)
|
||||
}
|
||||
if filter.NumNodes != nil {
|
||||
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
|
||||
|
@@ -1,107 +1,122 @@
|
||||
scalar Time
|
||||
scalar NullableFloat
|
||||
scalar MetricScope
|
||||
scalar JobState
|
||||
|
||||
type Job {
|
||||
Id: ID! # Database ID, unique
|
||||
JobId: Int! # ID given to the job by the cluster scheduler
|
||||
User: String! # Username
|
||||
Project: String! # Project
|
||||
Cluster: String! # Name of the cluster this job was running on
|
||||
StartTime: Time! # RFC3339 formated string
|
||||
Duration: Int! # For running jobs, the time it has already run
|
||||
NumNodes: Int! # Number of nodes this job was running on
|
||||
NumHWThreads: Int!
|
||||
NumAcc: Int!
|
||||
id: ID!
|
||||
jobId: Int!
|
||||
user: String!
|
||||
project: String!
|
||||
cluster: String!
|
||||
startTime: Time!
|
||||
duration: Int!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
SMT: Int!
|
||||
Exclusive: Int!
|
||||
Partition: String!
|
||||
ArrayJobId: Int!
|
||||
MonitoringStatus: Int!
|
||||
State: JobState! # State of the job
|
||||
Tags: [JobTag!]! # List of tags this job has
|
||||
Resources: [JobResource!]! # List of hosts/hwthreads/gpus/...
|
||||
|
||||
# Will be null for running jobs.
|
||||
LoadAvg: Float
|
||||
MemUsedMax: Float
|
||||
FlopsAnyAvg: Float
|
||||
MemBwAvg: Float
|
||||
NetBwAvg: Float
|
||||
FileBwAvg: Float
|
||||
}
|
||||
|
||||
type JobResource {
|
||||
Hostname: String!
|
||||
HWThreads: [Int!]
|
||||
Accelerators: [Accelerator!]
|
||||
}
|
||||
|
||||
type Accelerator {
|
||||
Id: String!
|
||||
Type: String!
|
||||
Model: String!
|
||||
}
|
||||
|
||||
# TODO: Extend by more possible states?
|
||||
enum JobState {
|
||||
running
|
||||
completed
|
||||
failed
|
||||
canceled
|
||||
stopped
|
||||
timeout
|
||||
}
|
||||
|
||||
type JobTag {
|
||||
Id: ID! # Database ID, unique
|
||||
TagType: String! # Type
|
||||
TagName: String! # Name
|
||||
exclusive: Int!
|
||||
partition: String!
|
||||
arrayJobId: Int!
|
||||
monitoringStatus: Int!
|
||||
state: JobState!
|
||||
tags: [Tag!]!
|
||||
resources: [JobResource!]!
|
||||
}
|
||||
|
||||
type Cluster {
|
||||
ClusterID: String!
|
||||
ProcessorType: String!
|
||||
SocketsPerNode: Int!
|
||||
CoresPerSocket: Int!
|
||||
ThreadsPerCore: Int!
|
||||
FlopRateScalar: Int!
|
||||
FlopRateSimd: Int!
|
||||
MemoryBandwidth: Int!
|
||||
MetricConfig: [MetricConfig!]!
|
||||
FilterRanges: FilterRanges!
|
||||
name: String!
|
||||
metricConfig: [MetricConfig!]!
|
||||
filterRanges: FilterRanges!
|
||||
partitions: [Partition!]!
|
||||
}
|
||||
|
||||
type Partition {
|
||||
name: String!
|
||||
processorType: String!
|
||||
socketsPerNode: Int!
|
||||
coresPerSocket: Int!
|
||||
threadsPerCore: Int!
|
||||
flopRateScalar: Int!
|
||||
flopRateSimd: Int!
|
||||
memoryBandwidth: Int!
|
||||
topology: Topology!
|
||||
}
|
||||
|
||||
type Topology {
|
||||
node: [Int!]
|
||||
socket: [[Int!]!]
|
||||
memoryDomain: [[Int!]!]
|
||||
die: [[Int!]!]
|
||||
core: [[Int!]!]
|
||||
accelerators: [Accelerator!]
|
||||
}
|
||||
|
||||
type Accelerator {
|
||||
id: String!
|
||||
type: String!
|
||||
model: String!
|
||||
}
|
||||
|
||||
type MetricConfig {
|
||||
Name: String!
|
||||
Unit: String!
|
||||
Timestep: Int!
|
||||
Peak: Int!
|
||||
Normal: Int!
|
||||
Caution: Int!
|
||||
Alert: Int!
|
||||
Scope: String!
|
||||
name: String!
|
||||
unit: String!
|
||||
scope: String!
|
||||
timestep: Int!
|
||||
Peak: Float!
|
||||
Normal: Float!
|
||||
Caution: Float!
|
||||
Alert: Float!
|
||||
}
|
||||
|
||||
type JobMetric {
|
||||
Unit: String!
|
||||
Scope: JobMetricScope!
|
||||
Timestep: Int!
|
||||
Series: [JobMetricSeries!]!
|
||||
type Tag {
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
}
|
||||
|
||||
type JobMetricSeries {
|
||||
Hostname: String!
|
||||
Id: Int
|
||||
Statistics: JobMetricStatistics
|
||||
Data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type JobMetricStatistics {
|
||||
Avg: Float!
|
||||
Min: Float!
|
||||
Max: Float!
|
||||
type JobResource {
|
||||
hostname: String!
|
||||
hwthreads: [Int!]
|
||||
accelerators: [Int!]
|
||||
configuration: String
|
||||
}
|
||||
|
||||
type JobMetricWithName {
|
||||
name: String!
|
||||
metric: JobMetric!
|
||||
name: String!
|
||||
|
||||
node: JobMetric
|
||||
socket: JobMetric
|
||||
memoryDomain: JobMetric
|
||||
core: JobMetric
|
||||
hwthread: JobMetric
|
||||
}
|
||||
|
||||
type JobMetric {
|
||||
unit: String!
|
||||
scope: MetricScope!
|
||||
timestep: Int!
|
||||
series: [Series!]!
|
||||
statisticsSeries: [StatsSeries!]
|
||||
}
|
||||
|
||||
type Series {
|
||||
hostname: String!
|
||||
id: Int
|
||||
statistics: MetricStatistics
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricStatistics {
|
||||
avg: Float!
|
||||
min: Float!
|
||||
max: Float!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]
|
||||
min: [NullableFloat!]
|
||||
max: [NullableFloat!]
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
@@ -123,7 +138,7 @@ type NodeMetrics {
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [JobTag!]! # List of all tags
|
||||
tags: [Tag!]! # List of all tags
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!]): [JobMetricWithName!]!
|
||||
@@ -138,23 +153,16 @@ type Query {
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createTag(type: String!, name: String!): JobTag!
|
||||
createTag(type: String!, name: String!): Tag!
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [JobTag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [JobTag!]!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
|
||||
updateConfiguration(name: String!, value: String!): String
|
||||
}
|
||||
|
||||
type IntRangeOutput {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
|
||||
type TimeRangeOutput {
|
||||
from: Time!
|
||||
to: Time!
|
||||
}
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
|
||||
type FilterRanges {
|
||||
duration: IntRangeOutput!
|
||||
@@ -171,7 +179,7 @@ input JobFilter {
|
||||
duration: IntRange
|
||||
numNodes: IntRange
|
||||
startTime: TimeRange
|
||||
jobState: [JobState!]
|
||||
state: [JobState!]
|
||||
flopsAnyAvg: FloatRange
|
||||
memBwAvg: FloatRange
|
||||
loadAvg: FloatRange
|
||||
@@ -195,20 +203,9 @@ input StringInput {
|
||||
endsWith: String
|
||||
}
|
||||
|
||||
input IntRange {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
|
||||
input FloatRange {
|
||||
from: Float!
|
||||
to: Float!
|
||||
}
|
||||
|
||||
input TimeRange {
|
||||
from: Time
|
||||
to: Time
|
||||
}
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input FloatRange { from: Float!, to: Float! }
|
||||
input TimeRange { from: Time, to: Time }
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
@@ -236,7 +233,3 @@ input PageRequest {
|
||||
itemsPerPage: Int!
|
||||
page: Int!
|
||||
}
|
||||
|
||||
scalar Time
|
||||
scalar NullableFloat
|
||||
scalar JobMetricScope
|
||||
|
@@ -19,36 +19,35 @@ import (
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
func (r *acceleratorResolver) ID(ctx context.Context, obj *schema.Accelerator) (string, error) {
|
||||
panic(fmt.Errorf("not implemented"))
|
||||
}
|
||||
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *model.Job) ([]*model.JobTag, error) {
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
query := sq.
|
||||
Select("tag.id", "tag.tag_type", "tag.tag_name").
|
||||
From("tag").
|
||||
Join("jobtag ON jobtag.tag_id = tag.id").
|
||||
Where("jobtag.job_id = ?", obj.ID)
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
tags := make([]*model.JobTag, 0)
|
||||
for rows.Next() {
|
||||
var tag model.JobTag
|
||||
if err := rows.Scan(&tag.ID, &tag.TagType, &tag.TagName); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, &tag)
|
||||
tags := make([]*schema.Tag, 0)
|
||||
if err := r.DB.Select(&tags, sql, args...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*model.JobTag, error) {
|
||||
func (r *jobResolver) Resources(ctx context.Context, obj *schema.Job) ([]*model.JobResource, error) {
|
||||
panic(fmt.Errorf("not implemented"))
|
||||
}
|
||||
|
||||
func (r *jobMetricResolver) StatisticsSeries(ctx context.Context, obj *schema.JobMetric) ([]*schema.StatsSeries, error) {
|
||||
panic(fmt.Errorf("not implemented"))
|
||||
}
|
||||
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||
res, err := r.DB.Exec("INSERT INTO tag (tag_type, tag_name) VALUES ($1, $2)", typeArg, name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -59,7 +58,7 @@ func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name s
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &model.JobTag{ID: strconv.FormatInt(id, 10), TagType: typeArg, TagName: name}, nil
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
||||
}
|
||||
|
||||
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
||||
@@ -67,7 +66,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
|
||||
panic(fmt.Errorf("not implemented"))
|
||||
}
|
||||
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*model.JobTag, error) {
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
jid, err := strconv.Atoi(job)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -84,7 +83,9 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := r.Job().Tags(ctx, &model.Job{ID: job})
|
||||
dummyJob := schema.Job{}
|
||||
dummyJob.ID = int64(jid)
|
||||
tags, err := r.Job().Tags(ctx, &dummyJob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -97,7 +98,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
return tags, metricdata.UpdateTags(jobObj, tags)
|
||||
}
|
||||
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*model.JobTag, error) {
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
jid, err := strconv.Atoi(job)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -114,7 +115,9 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := r.Job().Tags(ctx, &model.Job{ID: job})
|
||||
dummyJob := schema.Job{}
|
||||
dummyJob.ID = int64(jid)
|
||||
tags, err := r.Job().Tags(ctx, &dummyJob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -139,29 +142,28 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*model.Cluster, error)
|
||||
return config.Clusters, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*model.JobTag, error) {
|
||||
rows, err := sq.Select("id", "tag_type", "tag_name").From("tag").RunWith(r.DB).Query()
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
sql, args, err := sq.Select("id", "tag_type", "tag_name").From("tag").ToSql()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
tags := make([]*model.JobTag, 0)
|
||||
for rows.Next() {
|
||||
var tag model.JobTag
|
||||
if err := rows.Scan(&tag.ID, &tag.TagType, &tag.TagName); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, &tag)
|
||||
tags := make([]*schema.Tag, 0)
|
||||
if err := r.DB.Select(&tags, sql, args...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) Job(ctx context.Context, id string) (*model.Job, error) {
|
||||
query := sq.Select(JobTableCols...).From("job").Where("job.id = ?", id)
|
||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||
query := sq.Select(schema.JobColumns...).From("job").Where("job.id = ?", id)
|
||||
query = securityCheck(ctx, query)
|
||||
return ScanJob(query.RunWith(r.DB).QueryRow())
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return schema.ScanJob(r.DB.QueryRowx(sql, args...))
|
||||
}
|
||||
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string) ([]*model.JobMetricWithName, error) {
|
||||
@@ -178,8 +180,12 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
||||
res := []*model.JobMetricWithName{}
|
||||
for name, md := range data {
|
||||
res = append(res, &model.JobMetricWithName{
|
||||
Name: name,
|
||||
Metric: md,
|
||||
Name: name,
|
||||
Node: md["node"],
|
||||
Socket: md["socket"],
|
||||
MemoryDomain: md["memoryDomain"],
|
||||
Core: md["core"],
|
||||
Hwthread: md["hwthread"],
|
||||
})
|
||||
}
|
||||
|
||||
@@ -237,19 +243,19 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Accelerator returns generated.AcceleratorResolver implementation.
|
||||
func (r *Resolver) Accelerator() generated.AcceleratorResolver { return &acceleratorResolver{r} }
|
||||
|
||||
// Job returns generated.JobResolver implementation.
|
||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||
|
||||
// JobMetric returns generated.JobMetricResolver implementation.
|
||||
func (r *Resolver) JobMetric() generated.JobMetricResolver { return &jobMetricResolver{r} }
|
||||
|
||||
// Mutation returns generated.MutationResolver implementation.
|
||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||
|
||||
// Query returns generated.QueryResolver implementation.
|
||||
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
||||
|
||||
type acceleratorResolver struct{ *Resolver }
|
||||
type jobResolver struct{ *Resolver }
|
||||
type jobMetricResolver struct{ *Resolver }
|
||||
type mutationResolver struct{ *Resolver }
|
||||
type queryResolver struct{ *Resolver }
|
||||
|
108
graph/stats.go
108
graph/stats.go
@@ -3,6 +3,7 @@ package graph
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
@@ -16,9 +17,9 @@ import (
|
||||
|
||||
// GraphQL validation should make sure that no unkown values can be specified.
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.user_id",
|
||||
model.AggregateProject: "job.project_id",
|
||||
model.AggregateCluster: "job.cluster_id",
|
||||
model.AggregateUser: "job.user",
|
||||
model.AggregateProject: "job.project",
|
||||
model.AggregateCluster: "job.cluster",
|
||||
}
|
||||
|
||||
// Helper function for the jobsStatistics GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
@@ -28,53 +29,59 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
|
||||
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
||||
for _, cluster := range config.Clusters {
|
||||
corehoursCol := fmt.Sprintf("SUM(job.duration * job.num_nodes * %d * %d) / 3600", cluster.SocketsPerNode, cluster.CoresPerSocket)
|
||||
var query sq.SelectBuilder
|
||||
if groupBy == nil {
|
||||
query = sq.Select(
|
||||
"''",
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job").Where("job.cluster_id = ?", cluster.ClusterID)
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
query = sq.Select(
|
||||
col,
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job").Where("job.cluster_id = ?", cluster.ClusterID).GroupBy(col)
|
||||
}
|
||||
for _, partition := range cluster.Partitions {
|
||||
corehoursCol := fmt.Sprintf("SUM(job.duration * job.num_nodes * %d * %d) / 3600", partition.SocketsPerNode, partition.CoresPerSocket)
|
||||
var query sq.SelectBuilder
|
||||
if groupBy == nil {
|
||||
query = sq.Select(
|
||||
"''",
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job")
|
||||
} else {
|
||||
col := groupBy2column[*groupBy]
|
||||
query = sq.Select(
|
||||
col,
|
||||
"COUNT(job.id)",
|
||||
"SUM(job.duration) / 3600",
|
||||
corehoursCol,
|
||||
).From("job").GroupBy(col)
|
||||
}
|
||||
|
||||
query = securityCheck(ctx, query)
|
||||
for _, f := range filter {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
query = query.
|
||||
Where("job.cluster = ?", cluster.Name).
|
||||
Where("job.partition = ?", partition.Name)
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
query = securityCheck(ctx, query)
|
||||
for _, f := range filter {
|
||||
query = buildWhereClause(f, query)
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var jobs, walltime, corehours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
if s, ok := stats[id.String]; ok {
|
||||
s.TotalJobs += int(jobs.Int64)
|
||||
s.TotalWalltime += int(walltime.Int64)
|
||||
s.TotalCoreHours += int(corehours.Int64)
|
||||
} else {
|
||||
stats[id.String] = &model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(jobs.Int64),
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalCoreHours: int(corehours.Int64),
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var jobs, walltime, corehours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &corehours); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
if s, ok := stats[id.String]; ok {
|
||||
s.TotalJobs += int(jobs.Int64)
|
||||
s.TotalWalltime += int(walltime.Int64)
|
||||
s.TotalCoreHours += int(corehours.Int64)
|
||||
} else {
|
||||
stats[id.String] = &model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(jobs.Int64),
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalCoreHours: int(corehours.Int64),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -204,9 +211,16 @@ func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilte
|
||||
return nil, err
|
||||
}
|
||||
|
||||
flops, membw := jobdata["flops_any"], jobdata["mem_bw"]
|
||||
if flops == nil && membw == nil {
|
||||
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %s", job.ID)
|
||||
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
||||
if flops_ == nil && membw_ == nil {
|
||||
return nil, fmt.Errorf("'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||
}
|
||||
|
||||
flops, ok1 := flops_["node"]
|
||||
membw, ok2 := membw_["node"]
|
||||
if !ok1 || !ok2 {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||
}
|
||||
|
||||
for n := 0; n < len(flops.Series); n++ {
|
||||
|
Reference in New Issue
Block a user