BC: new schemas for basically everything

2025-07-23 04:51:39 +02:00 · 2021-12-16 13:17:48 +01:00
parent 7fcc39a144
commit 89333666b3
14 changed files with 1631 additions and 549 deletions
--- a/graph/generated/generated.go
+++ b/graph/generated/generated.go
--- a/graph/model/models_gen.go
+++ b/graph/model/models_gen.go
@@ -38,36 +38,42 @@ type IntRangeOutput struct {
 }

 type Job struct {
-	ID          string    `json:"id"`
-	JobID       string    `json:"jobId"`
-	UserID      string    `json:"userId"`
-	ProjectID   string    `json:"projectId"`
-	ClusterID   string    `json:"clusterId"`
-	StartTime   time.Time `json:"startTime"`
-	Duration    int       `json:"duration"`
-	NumNodes    int       `json:"numNodes"`
-	Nodes       []string  `json:"nodes"`
-	HasProfile  bool      `json:"hasProfile"`
-	State       JobState  `json:"state"`
-	Tags        []*JobTag `json:"tags"`
-	LoadAvg     *float64  `json:"loadAvg"`
-	MemUsedMax  *float64  `json:"memUsedMax"`
-	FlopsAnyAvg *float64  `json:"flopsAnyAvg"`
-	MemBwAvg    *float64  `json:"memBwAvg"`
-	NetBwAvg    *float64  `json:"netBwAvg"`
-	FileBwAvg   *float64  `json:"fileBwAvg"`
+	ID               string                `json:"Id"`
+	JobID            int                   `json:"JobId"`
+	User             string                `json:"User"`
+	Project          string                `json:"Project"`
+	Cluster          string                `json:"Cluster"`
+	StartTime        time.Time             `json:"StartTime"`
+	Duration         int                   `json:"Duration"`
+	NumNodes         int                   `json:"NumNodes"`
+	NumHWThreads     int                   `json:"NumHWThreads"`
+	NumAcc           int                   `json:"NumAcc"`
+	Smt              int                   `json:"SMT"`
+	Exclusive        int                   `json:"Exclusive"`
+	Partition        string                `json:"Partition"`
+	ArrayJobID       int                   `json:"ArrayJobId"`
+	MonitoringStatus int                   `json:"MonitoringStatus"`
+	State            JobState              `json:"State"`
+	Tags             []*JobTag             `json:"Tags"`
+	Resources        []*schema.JobResource `json:"Resources"`
+	LoadAvg          *float64              `json:"LoadAvg"`
+	MemUsedMax       *float64              `json:"MemUsedMax"`
+	FlopsAnyAvg      *float64              `json:"FlopsAnyAvg"`
+	MemBwAvg         *float64              `json:"MemBwAvg"`
+	NetBwAvg         *float64              `json:"NetBwAvg"`
+	FileBwAvg        *float64              `json:"FileBwAvg"`
 }

 type JobFilter struct {
 	Tags        []string     `json:"tags"`
 	JobID       *StringInput `json:"jobId"`
-	UserID      *StringInput `json:"userId"`
-	ProjectID   *StringInput `json:"projectId"`
-	ClusterID   *StringInput `json:"clusterId"`
+	User        *StringInput `json:"user"`
+	Project     *StringInput `json:"project"`
+	Cluster     *StringInput `json:"cluster"`
 	Duration    *IntRange    `json:"duration"`
 	NumNodes    *IntRange    `json:"numNodes"`
 	StartTime   *TimeRange   `json:"startTime"`
-	IsRunning   *bool        `json:"isRunning"`
+	JobState    []JobState   `json:"jobState"`
 	FlopsAnyAvg *FloatRange  `json:"flopsAnyAvg"`
 	MemBwAvg    *FloatRange  `json:"memBwAvg"`
 	LoadAvg     *FloatRange  `json:"loadAvg"`
@@ -97,13 +103,14 @@ type JobsStatistics struct {
 }

 type MetricConfig struct {
-	Name       string `json:"name"`
-	Unit       string `json:"unit"`
-	Sampletime int    `json:"sampletime"`
-	Peak       int    `json:"peak"`
-	Normal     int    `json:"normal"`
-	Caution    int    `json:"caution"`
-	Alert      int    `json:"alert"`
+	Name     string `json:"Name"`
+	Unit     string `json:"Unit"`
+	Timestep int    `json:"Timestep"`
+	Peak     int    `json:"Peak"`
+	Normal   int    `json:"Normal"`
+	Caution  int    `json:"Caution"`
+	Alert    int    `json:"Alert"`
+	Scope    string `json:"Scope"`
 }

 type MetricFootprints struct {
@@ -196,16 +203,24 @@ type JobState string
 const (
 	JobStateRunning   JobState = "running"
 	JobStateCompleted JobState = "completed"
+	JobStateFailed    JobState = "failed"
+	JobStateCanceled  JobState = "canceled"
+	JobStateStopped   JobState = "stopped"
+	JobStateTimeout   JobState = "timeout"
 )

 var AllJobState = []JobState{
 	JobStateRunning,
 	JobStateCompleted,
+	JobStateFailed,
+	JobStateCanceled,
+	JobStateStopped,
+	JobStateTimeout,
 }

 func (e JobState) IsValid() bool {
 	switch e {
-	case JobStateRunning, JobStateCompleted:
+	case JobStateRunning, JobStateCompleted, JobStateFailed, JobStateCanceled, JobStateStopped, JobStateTimeout:
 		return true
 	}
 	return false
--- a/graph/resolver.go
+++ b/graph/resolver.go
@@ -2,6 +2,7 @@ package graph

 import (
 	"context"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"regexp"
@@ -22,7 +23,12 @@ type Resolver struct {
 	DB *sqlx.DB
 }

-var JobTableCols []string = []string{"id", "job_id", "user_id", "project_id", "cluster_id", "start_time", "duration", "job_state", "num_nodes", "node_list", "flops_any_avg", "mem_bw_avg", "net_bw_avg", "file_bw_avg", "load_avg"}
+var JobTableCols []string = []string{
+	"id", "job_id", "cluster", "start_time",
+	"user", "project", "partition", "array_job_id", "duration", "job_state", "resources",
+	"num_nodes", "num_hwthreads", "num_acc", "smt", "exclusive", "monitoring_status",
+	"load_avg", "mem_used_max", "flops_any_avg", "mem_bw_avg", "net_bw_avg", "file_bw_avg",
+}

 type Scannable interface {
 	Scan(dest ...interface{}) error
@@ -30,13 +36,18 @@ type Scannable interface {

 // Helper function for scanning jobs with the `jobTableCols` columns selected.
 func ScanJob(row Scannable) (*model.Job, error) {
-	job := &model.Job{HasProfile: true}
+	job := &model.Job{}

-	var nodeList string
+	var rawResources []byte
 	if err := row.Scan(
-		&job.ID, &job.JobID, &job.UserID, &job.ProjectID, &job.ClusterID,
-		&job.StartTime, &job.Duration, &job.State, &job.NumNodes, &nodeList,
-		&job.FlopsAnyAvg, &job.MemBwAvg, &job.NetBwAvg, &job.FileBwAvg, &job.LoadAvg); err != nil {
+		&job.ID, &job.JobID, &job.Cluster, &job.StartTime,
+		&job.User, &job.Project, &job.Partition, &job.ArrayJobID, &job.Duration, &job.State, &rawResources,
+		&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Smt, &job.Exclusive, &job.MonitoringStatus,
+		&job.LoadAvg, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.NetBwAvg, &job.FileBwAvg); err != nil {
+		return nil, err
+	}
+
+	if err := json.Unmarshal(rawResources, &job.Resources); err != nil {
 		return nil, err
 	}

@@ -44,7 +55,6 @@ func ScanJob(row Scannable) (*model.Job, error) {
 		job.Duration = int(time.Since(job.StartTime).Seconds())
 	}

-	job.Nodes = strings.Split(nodeList, ",")
 	return job, nil
 }

@@ -130,14 +140,14 @@ func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
 	if filter.JobID != nil {
 		query = buildStringCondition("job.job_id", filter.JobID, query)
 	}
-	if filter.UserID != nil {
-		query = buildStringCondition("job.user_id", filter.UserID, query)
+	if filter.User != nil {
+		query = buildStringCondition("job.user", filter.User, query)
 	}
-	if filter.ProjectID != nil {
-		query = buildStringCondition("job.project_id", filter.ProjectID, query)
+	if filter.Project != nil {
+		query = buildStringCondition("job.project", filter.Project, query)
 	}
-	if filter.ClusterID != nil {
-		query = buildStringCondition("job.cluster_id", filter.ClusterID, query)
+	if filter.Cluster != nil {
+		query = buildStringCondition("job.cluster", filter.Cluster, query)
 	}
 	if filter.StartTime != nil {
 		query = buildTimeCondition("job.start_time", filter.StartTime, query)
@@ -145,12 +155,8 @@ func buildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
 	if filter.Duration != nil {
 		query = buildIntCondition("job.duration", filter.Duration, query)
 	}
-	if filter.IsRunning != nil {
-		if *filter.IsRunning {
-			query = query.Where("job.job_state = 'running'")
-		} else {
-			query = query.Where("job.job_state = 'completed'")
-		}
+	if filter.JobState != nil {
+		query = query.Where("job.job_state IN ?", filter.JobState)
 	}
 	if filter.NumNodes != nil {
 		query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
--- a/graph/schema.graphqls
+++ b/graph/schema.graphqls
@@ -1,78 +1,102 @@
 type Job {
-  id:         ID!        # Database ID, unique
-  jobId:      String!    # ID given to the job by the cluster scheduler
-  userId:     String!    # Username
-  projectId:  String!    # Project
-  clusterId:  String!    # Name of the cluster this job was running on
-  startTime:  Time!      # RFC3339 formated string
-  duration:   Int!       # For running jobs, the time it has already run
-  numNodes:   Int!       # Number of nodes this job was running on
-  nodes:      [String!]! # List of hostnames
-  hasProfile: Boolean!   # TODO: Could be removed?
-  state:      JobState!  # State of the job
-  tags:       [JobTag!]! # List of tags this job has
+  Id:               ID!             # Database ID, unique
+  JobId:            Int!            # ID given to the job by the cluster scheduler
+  User:             String!         # Username
+  Project:          String!         # Project
+  Cluster:          String!         # Name of the cluster this job was running on
+  StartTime:        Time!           # RFC3339 formated string
+  Duration:         Int!            # For running jobs, the time it has already run
+  NumNodes:         Int!            # Number of nodes this job was running on
+  NumHWThreads:     Int!
+  NumAcc:           Int!
+  SMT:              Int!
+  Exclusive:        Int!
+  Partition:        String!
+  ArrayJobId:       Int!
+  MonitoringStatus: Int!
+  State:            JobState!       # State of the job
+  Tags:             [JobTag!]!      # List of tags this job has
+  Resources:        [JobResource!]! # List of hosts/hwthreads/gpus/...

  # Will be null for running jobs.
-  loadAvg:     Float
-  memUsedMax:  Float
-  flopsAnyAvg: Float
-  memBwAvg:    Float
-  netBwAvg:    Float
-  fileBwAvg:   Float
+  LoadAvg:     Float
+  MemUsedMax:  Float
+  FlopsAnyAvg: Float
+  MemBwAvg:    Float
+  NetBwAvg:    Float
+  FileBwAvg:   Float
+}
+
+type JobResource {
+  Hostname: String!
+  HWThreads: [Int!]
+  Accelerators: [Accelerator!]
+}
+
+type Accelerator {
+  Id: String!
+  Type: String!
+  Model: String!
 }

 # TODO: Extend by more possible states?
 enum JobState {
  running
  completed
+  failed
+  canceled
+  stopped
+  timeout
 }

 type JobTag {
-  id:      ID!     # Database ID, unique
-  tagType: String! # Type
-  tagName: String! # Name
+  Id:      ID!     # Database ID, unique
+  TagType: String! # Type
+  TagName: String! # Name
 }

 type Cluster {
-  clusterID:       String!
-  processorType:   String!
-  socketsPerNode:  Int!
-  coresPerSocket:  Int!
-  threadsPerCore:  Int!
-  flopRateScalar:  Int!
-  flopRateSimd:    Int!
-  memoryBandwidth: Int!
-  metricConfig:    [MetricConfig!]!
-  filterRanges:    FilterRanges!
+  ClusterID:       String!
+  ProcessorType:   String!
+  SocketsPerNode:  Int!
+  CoresPerSocket:  Int!
+  ThreadsPerCore:  Int!
+  FlopRateScalar:  Int!
+  FlopRateSimd:    Int!
+  MemoryBandwidth: Int!
+  MetricConfig:    [MetricConfig!]!
+  FilterRanges:    FilterRanges!
 }

 type MetricConfig {
-  name:       String!
-  unit:       String!
-  sampletime: Int!
-  peak:       Int!
-  normal:     Int!
-  caution:    Int!
-  alert:      Int!
+  Name:     String!
+  Unit:     String!
+  Timestep: Int!
+  Peak:     Int!
+  Normal:   Int!
+  Caution:  Int!
+  Alert:    Int!
+  Scope:    String!
 }

 type JobMetric {
-  unit:     String!
-  scope:    JobMetricScope!
-  timestep: Int!
-  series:   [JobMetricSeries!]!
+  Unit:     String!
+  Scope:    JobMetricScope!
+  Timestep: Int!
+  Series:   [JobMetricSeries!]!
 }

 type JobMetricSeries {
-  node_id:    String!
-  statistics: JobMetricStatistics
-  data:       [NullableFloat!]!
+  Hostname:   String!
+  Id:         Int
+  Statistics: JobMetricStatistics
+  Data:       [NullableFloat!]!
 }

 type JobMetricStatistics {
-  avg: Float!
-  min: Float!
-  max: Float!
+  Avg: Float!
+  Min: Float!
+  Max: Float!
 }

 type JobMetricWithName {
@@ -141,13 +165,13 @@ type FilterRanges {
 input JobFilter {
  tags:        [ID!]
  jobId:       StringInput
-  userId:      StringInput
-  projectId:   StringInput
-  clusterId:   StringInput
+  user:        StringInput
+  project:     StringInput
+  cluster:     StringInput
  duration:    IntRange
  numNodes:    IntRange
  startTime:   TimeRange
-  isRunning:   Boolean
+  jobState:    [JobState!]
  flopsAnyAvg: FloatRange
  memBwAvg:    FloatRange
  loadAvg:     FloatRange
--- a/graph/schema.resolvers.go
+++ b/graph/schema.resolvers.go
@@ -15,9 +15,14 @@ import (
 	"github.com/ClusterCockpit/cc-jobarchive/graph/generated"
 	"github.com/ClusterCockpit/cc-jobarchive/graph/model"
 	"github.com/ClusterCockpit/cc-jobarchive/metricdata"
+	"github.com/ClusterCockpit/cc-jobarchive/schema"
 	sq "github.com/Masterminds/squirrel"
 )

+func (r *acceleratorResolver) ID(ctx context.Context, obj *schema.Accelerator) (string, error) {
+	panic(fmt.Errorf("not implemented"))
+}
+
 func (r *jobResolver) Tags(ctx context.Context, obj *model.Job) ([]*model.JobTag, error) {
 	query := sq.
 		Select("tag.id", "tag.tag_type", "tag.tag_name").
@@ -232,6 +237,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
 	return res, nil
 }

+// Accelerator returns generated.AcceleratorResolver implementation.
+func (r *Resolver) Accelerator() generated.AcceleratorResolver { return &acceleratorResolver{r} }
+
 // Job returns generated.JobResolver implementation.
 func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }

@@ -241,6 +249,7 @@ func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResol
 // Query returns generated.QueryResolver implementation.
 func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }

+type acceleratorResolver struct{ *Resolver }
 type jobResolver struct{ *Resolver }
 type mutationResolver struct{ *Resolver }
 type queryResolver struct{ *Resolver }