BC: new schemas for basically everything

This commit is contained in:
Lou Knauer
2021-12-16 13:17:48 +01:00
parent 7fcc39a144
commit 89333666b3
14 changed files with 1631 additions and 549 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -38,36 +38,42 @@ type IntRangeOutput struct {
}
type Job struct {
ID string `json:"id"`
JobID string `json:"jobId"`
UserID string `json:"userId"`
ProjectID string `json:"projectId"`
ClusterID string `json:"clusterId"`
StartTime time.Time `json:"startTime"`
Duration int `json:"duration"`
NumNodes int `json:"numNodes"`
Nodes []string `json:"nodes"`
HasProfile bool `json:"hasProfile"`
State JobState `json:"state"`
Tags []*JobTag `json:"tags"`
LoadAvg *float64 `json:"loadAvg"`
MemUsedMax *float64 `json:"memUsedMax"`
FlopsAnyAvg *float64 `json:"flopsAnyAvg"`
MemBwAvg *float64 `json:"memBwAvg"`
NetBwAvg *float64 `json:"netBwAvg"`
FileBwAvg *float64 `json:"fileBwAvg"`
ID string `json:"Id"`
JobID int `json:"JobId"`
User string `json:"User"`
Project string `json:"Project"`
Cluster string `json:"Cluster"`
StartTime time.Time `json:"StartTime"`
Duration int `json:"Duration"`
NumNodes int `json:"NumNodes"`
NumHWThreads int `json:"NumHWThreads"`
NumAcc int `json:"NumAcc"`
Smt int `json:"SMT"`
Exclusive int `json:"Exclusive"`
Partition string `json:"Partition"`
ArrayJobID int `json:"ArrayJobId"`
MonitoringStatus int `json:"MonitoringStatus"`
State JobState `json:"State"`
Tags []*JobTag `json:"Tags"`
Resources []*schema.JobResource `json:"Resources"`
LoadAvg *float64 `json:"LoadAvg"`
MemUsedMax *float64 `json:"MemUsedMax"`
FlopsAnyAvg *float64 `json:"FlopsAnyAvg"`
MemBwAvg *float64 `json:"MemBwAvg"`
NetBwAvg *float64 `json:"NetBwAvg"`
FileBwAvg *float64 `json:"FileBwAvg"`
}
type JobFilter struct {
Tags []string `json:"tags"`
JobID *StringInput `json:"jobId"`
UserID *StringInput `json:"userId"`
ProjectID *StringInput `json:"projectId"`
ClusterID *StringInput `json:"clusterId"`
User *StringInput `json:"user"`
Project *StringInput `json:"project"`
Cluster *StringInput `json:"cluster"`
Duration *IntRange `json:"duration"`
NumNodes *IntRange `json:"numNodes"`
StartTime *TimeRange `json:"startTime"`
IsRunning *bool `json:"isRunning"`
JobState []JobState `json:"jobState"`
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
MemBwAvg *FloatRange `json:"memBwAvg"`
LoadAvg *FloatRange `json:"loadAvg"`
@@ -97,13 +103,14 @@ type JobsStatistics struct {
}
type MetricConfig struct {
Name string `json:"name"`
Unit string `json:"unit"`
Sampletime int `json:"sampletime"`
Peak int `json:"peak"`
Normal int `json:"normal"`
Caution int `json:"caution"`
Alert int `json:"alert"`
Name string `json:"Name"`
Unit string `json:"Unit"`
Timestep int `json:"Timestep"`
Peak int `json:"Peak"`
Normal int `json:"Normal"`
Caution int `json:"Caution"`
Alert int `json:"Alert"`
Scope string `json:"Scope"`
}
type MetricFootprints struct {
@@ -196,16 +203,24 @@ type JobState string
const (
JobStateRunning JobState = "running"
JobStateCompleted JobState = "completed"
JobStateFailed JobState = "failed"
JobStateCanceled JobState = "canceled"
JobStateStopped JobState = "stopped"
JobStateTimeout JobState = "timeout"
)
var AllJobState = []JobState{
JobStateRunning,
JobStateCompleted,
JobStateFailed,
JobStateCanceled,
JobStateStopped,
JobStateTimeout,
}
func (e JobState) IsValid() bool {
switch e {
case JobStateRunning, JobStateCompleted:
case JobStateRunning, JobStateCompleted, JobStateFailed, JobStateCanceled, JobStateStopped, JobStateTimeout:
return true
}
return false

View File

@@ -2,6 +2,7 @@ package graph
import (
"context"
"encoding/json"
"errors"
"fmt"
"regexp"
@@ -22,7 +23,12 @@ type Resolver struct {
DB *sqlx.DB
}
var JobTableCols []string = []string{"id", "job_id", "user_id", "project_id", "cluster_id", "start_time", "duration", "job_state", "num_nodes", "node_list", "flops_any_avg", "mem_bw_avg", "net_bw_avg", "file_bw_avg", "load_avg"}
var JobTableCols []string = []string{
"id", "job_id", "cluster", "start_time",
"user", "project", "partition", "array_job_id", "duration", "job_state", "resources",
"num_nodes", "num_hwthreads", "num_acc", "smt", "exclusive", "monitoring_status",
"load_avg", "mem_used_max", "flops_any_avg", "mem_bw_avg", "net_bw_avg", "file_bw_avg",
}
type Scannable interface {
Scan(dest ...interface{}) error
@@ -30,13 +36,18 @@ type Scannable interface {
// Helper function for scanning jobs with the `jobTableCols` columns selected.
func ScanJob(row Scannable) (*model.Job, error) {
job := &model.Job{HasProfile: true}
job := &model.Job{}
var nodeList string
var rawResources []byte
if err := row.Scan(
&job.ID, &job.JobID, &job.UserID, &job.ProjectID, &job.ClusterID,
&job.StartTime, &job.Duration, &job.State, &job.NumNodes, &nodeList,
&job.FlopsAnyAvg, &job.MemBwAvg, &job.NetBwAvg, &job.FileBwAvg, &job.LoadAvg); err != nil {
&job.ID, &job.JobID, &job.Cluster, &job.StartTime,
&job.User, &job.Project, &job.Partition, &job.ArrayJobID, &job.Duration, &job.State, &rawResources,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Smt, &job.Exclusive, &job.MonitoringStatus,
&job.LoadAvg, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.NetBwAvg, &job.FileBwAvg); err != nil {
return nil, err
}
if err := json.Unmarshal(rawResources, &job.Resources); err != nil {
return nil, err
}
@@ -44,7 +55,6 @@ func ScanJob(row Scannable) (*model.Job, error) {
job.Duration = int(time.Since(job.StartTime).Seconds())
}
job.Nodes = strings.Split(nodeList, ",")
return job, nil
}
@@ -130,14 +140,14 @@ func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
}
if filter.UserID != nil {
query = buildStringCondition("job.user_id", filter.UserID, query)
if filter.User != nil {
query = buildStringCondition("job.user", filter.User, query)
}
if filter.ProjectID != nil {
query = buildStringCondition("job.project_id", filter.ProjectID, query)
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.ClusterID != nil {
query = buildStringCondition("job.cluster_id", filter.ClusterID, query)
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
@@ -145,12 +155,8 @@ func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
if filter.Duration != nil {
query = buildIntCondition("job.duration", filter.Duration, query)
}
if filter.IsRunning != nil {
if *filter.IsRunning {
query = query.Where("job.job_state = 'running'")
} else {
query = query.Where("job.job_state = 'completed'")
}
if filter.JobState != nil {
query = query.Where("job.job_state IN ?", filter.JobState)
}
if filter.NumNodes != nil {
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)

View File

@@ -1,78 +1,102 @@
type Job {
id: ID! # Database ID, unique
jobId: String! # ID given to the job by the cluster scheduler
userId: String! # Username
projectId: String! # Project
clusterId: String! # Name of the cluster this job was running on
startTime: Time! # RFC3339 formated string
duration: Int! # For running jobs, the time it has already run
numNodes: Int! # Number of nodes this job was running on
nodes: [String!]! # List of hostnames
hasProfile: Boolean! # TODO: Could be removed?
state: JobState! # State of the job
tags: [JobTag!]! # List of tags this job has
Id: ID! # Database ID, unique
JobId: Int! # ID given to the job by the cluster scheduler
User: String! # Username
Project: String! # Project
Cluster: String! # Name of the cluster this job was running on
StartTime: Time! # RFC3339 formated string
Duration: Int! # For running jobs, the time it has already run
NumNodes: Int! # Number of nodes this job was running on
NumHWThreads: Int!
NumAcc: Int!
SMT: Int!
Exclusive: Int!
Partition: String!
ArrayJobId: Int!
MonitoringStatus: Int!
State: JobState! # State of the job
Tags: [JobTag!]! # List of tags this job has
Resources: [JobResource!]! # List of hosts/hwthreads/gpus/...
# Will be null for running jobs.
loadAvg: Float
memUsedMax: Float
flopsAnyAvg: Float
memBwAvg: Float
netBwAvg: Float
fileBwAvg: Float
LoadAvg: Float
MemUsedMax: Float
FlopsAnyAvg: Float
MemBwAvg: Float
NetBwAvg: Float
FileBwAvg: Float
}
type JobResource {
Hostname: String!
HWThreads: [Int!]
Accelerators: [Accelerator!]
}
type Accelerator {
Id: String!
Type: String!
Model: String!
}
# TODO: Extend by more possible states?
enum JobState {
running
completed
failed
canceled
stopped
timeout
}
type JobTag {
id: ID! # Database ID, unique
tagType: String! # Type
tagName: String! # Name
Id: ID! # Database ID, unique
TagType: String! # Type
TagName: String! # Name
}
type Cluster {
clusterID: String!
processorType: String!
socketsPerNode: Int!
coresPerSocket: Int!
threadsPerCore: Int!
flopRateScalar: Int!
flopRateSimd: Int!
memoryBandwidth: Int!
metricConfig: [MetricConfig!]!
filterRanges: FilterRanges!
ClusterID: String!
ProcessorType: String!
SocketsPerNode: Int!
CoresPerSocket: Int!
ThreadsPerCore: Int!
FlopRateScalar: Int!
FlopRateSimd: Int!
MemoryBandwidth: Int!
MetricConfig: [MetricConfig!]!
FilterRanges: FilterRanges!
}
type MetricConfig {
name: String!
unit: String!
sampletime: Int!
peak: Int!
normal: Int!
caution: Int!
alert: Int!
Name: String!
Unit: String!
Timestep: Int!
Peak: Int!
Normal: Int!
Caution: Int!
Alert: Int!
Scope: String!
}
type JobMetric {
unit: String!
scope: JobMetricScope!
timestep: Int!
series: [JobMetricSeries!]!
Unit: String!
Scope: JobMetricScope!
Timestep: Int!
Series: [JobMetricSeries!]!
}
type JobMetricSeries {
node_id: String!
statistics: JobMetricStatistics
data: [NullableFloat!]!
Hostname: String!
Id: Int
Statistics: JobMetricStatistics
Data: [NullableFloat!]!
}
type JobMetricStatistics {
avg: Float!
min: Float!
max: Float!
Avg: Float!
Min: Float!
Max: Float!
}
type JobMetricWithName {
@@ -141,13 +165,13 @@ type FilterRanges {
input JobFilter {
tags: [ID!]
jobId: StringInput
userId: StringInput
projectId: StringInput
clusterId: StringInput
user: StringInput
project: StringInput
cluster: StringInput
duration: IntRange
numNodes: IntRange
startTime: TimeRange
isRunning: Boolean
jobState: [JobState!]
flopsAnyAvg: FloatRange
memBwAvg: FloatRange
loadAvg: FloatRange

View File

@@ -15,9 +15,14 @@ import (
"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
"github.com/ClusterCockpit/cc-jobarchive/metricdata"
"github.com/ClusterCockpit/cc-jobarchive/schema"
sq "github.com/Masterminds/squirrel"
)
func (r *acceleratorResolver) ID(ctx context.Context, obj *schema.Accelerator) (string, error) {
panic(fmt.Errorf("not implemented"))
}
func (r *jobResolver) Tags(ctx context.Context, obj *model.Job) ([]*model.JobTag, error) {
query := sq.
Select("tag.id", "tag.tag_type", "tag.tag_name").
@@ -232,6 +237,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
return res, nil
}
// Accelerator returns generated.AcceleratorResolver implementation.
func (r *Resolver) Accelerator() generated.AcceleratorResolver { return &acceleratorResolver{r} }
// Job returns generated.JobResolver implementation.
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
@@ -241,6 +249,7 @@ func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResol
// Query returns generated.QueryResolver implementation.
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
type acceleratorResolver struct{ *Resolver }
type jobResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }