Merge branch 'master' into add-influxdb2-client

This commit is contained in:
Christoph Kluge
2022-03-22 11:10:32 +01:00
33 changed files with 2194 additions and 955 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ type Cluster struct {
Name string `json:"name"`
MetricConfig []*MetricConfig `json:"metricConfig"`
FilterRanges *FilterRanges `json:"filterRanges"`
Partitions []*Partition `json:"partitions"`
SubClusters []*SubCluster `json:"subClusters"`
// NOT part of the GraphQL API. This has to be a JSON object with a field `"kind"`.
// All other fields depend on that kind (e.g. "cc-metric-store", "influxdb-v2").

View File

@@ -33,6 +33,11 @@ type FloatRange struct {
To float64 `json:"to"`
}
type Footprints struct {
Nodehours []schema.Float `json:"nodehours"`
Metrics []*MetricFootprints `json:"metrics"`
}
type HistoPoint struct {
Count int `json:"count"`
Value int `json:"value"`
@@ -92,19 +97,20 @@ type JobsStatistics struct {
}
type MetricConfig struct {
Name string `json:"name"`
Unit string `json:"unit"`
Scope schema.MetricScope `json:"scope"`
Timestep int `json:"timestep"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
Name string `json:"name"`
Unit string `json:"unit"`
Scope schema.MetricScope `json:"scope"`
Aggregation *string `json:"aggregation"`
Timestep int `json:"timestep"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
}
type MetricFootprints struct {
Name string `json:"name"`
Footprints []schema.Float `json:"footprints"`
Metric string `json:"metric"`
Data []schema.Float `json:"data"`
}
type NodeMetrics struct {
@@ -122,8 +128,16 @@ type PageRequest struct {
Page int `json:"page"`
}
type Partition struct {
type StringInput struct {
Eq *string `json:"eq"`
Contains *string `json:"contains"`
StartsWith *string `json:"startsWith"`
EndsWith *string `json:"endsWith"`
}
type SubCluster struct {
Name string `json:"name"`
Nodes string `json:"nodes"`
ProcessorType string `json:"processorType"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`
@@ -134,13 +148,6 @@ type Partition struct {
Topology *Topology `json:"topology"`
}
type StringInput struct {
Eq *string `json:"eq"`
Contains *string `json:"contains"`
StartsWith *string `json:"startsWith"`
EndsWith *string `json:"endsWith"`
}
type TimeRange struct {
From *time.Time `json:"from"`
To *time.Time `json:"to"`
@@ -160,6 +167,12 @@ type Topology struct {
Accelerators []*Accelerator `json:"accelerators"`
}
type User struct {
Username string `json:"username"`
Name string `json:"name"`
Email string `json:"email"`
}
type Aggregate string
const (

View File

@@ -11,8 +11,10 @@ type Job {
user: String!
project: String!
cluster: String!
subCluster: String!
startTime: Time!
duration: Int!
walltime: Int!
numNodes: Int!
numHWThreads: Int!
numAcc: Int!
@@ -22,20 +24,24 @@ type Job {
arrayJobId: Int!
monitoringStatus: Int!
state: JobState!
metaData: Any
tags: [Tag!]!
resources: [Resource!]!
metaData: Any
userData: User
}
type Cluster {
name: String!
partitions: [String!]! # Slurm partitions
metricConfig: [MetricConfig!]!
filterRanges: FilterRanges!
partitions: [Partition!]!
subClusters: [SubCluster!]! # Hardware partitions/subclusters
}
type Partition {
type SubCluster {
name: String!
nodes: String!
processorType: String!
socketsPerNode: Int!
coresPerSocket: Int!
@@ -62,14 +68,15 @@ type Accelerator {
}
type MetricConfig {
name: String!
unit: String!
scope: MetricScope!
timestep: Int!
peak: Float!
normal: Float!
caution: Float!
alert: Float!
name: String!
unit: String!
scope: MetricScope!
aggregation: String
timestep: Int!
peak: Float!
normal: Float!
caution: Float!
alert: Float!
}
type Tag {
@@ -118,8 +125,13 @@ type StatsSeries {
}
type MetricFootprints {
name: String!
footprints: [NullableFloat!]!
metric: String!
data: [NullableFloat!]!
}
type Footprints {
nodehours: [NullableFloat!]!
metrics: [MetricFootprints!]!
}
enum Aggregate { USER, PROJECT, CLUSTER }
@@ -134,13 +146,21 @@ type Count {
count: Int!
}
type User {
username: String!
name: String!
email: String!
}
type Query {
clusters: [Cluster!]! # List of all clusters
tags: [Tag!]! # List of all tags
user(username: String!): User
job(id: ID!): Job
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): [MetricFootprints]!
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!

View File

@@ -18,14 +18,22 @@ import (
"github.com/ClusterCockpit/cc-backend/schema"
)
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
return r.Repo.FetchMetadata(obj)
func (r *clusterResolver) Partitions(ctx context.Context, obj *model.Cluster) ([]string, error) {
return r.Repo.Partitions(obj.Name)
}
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(&obj.ID)
}
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
return r.Repo.FetchMetadata(obj)
}
func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.User, error) {
return auth.FetchUser(ctx, r.DB, obj.User)
}
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name)
if err != nil {
@@ -98,6 +106,10 @@ func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
return r.Repo.GetTags(nil)
}
func (r *queryResolver) User(ctx context.Context, username string) (*model.User, error) {
return auth.FetchUser(ctx, r.DB, username)
}
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
numericId, err := strconv.ParseInt(id, 10, 64)
if err != nil {
@@ -144,7 +156,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
return res, err
}
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.MetricFootprints, error) {
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
return r.jobsFootprints(ctx, filter, metrics)
}
@@ -204,7 +216,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partiti
}
if metrics == nil {
for _, mc := range config.GetClusterConfig(cluster).MetricConfig {
for _, mc := range config.GetCluster(cluster).MetricConfig {
metrics = append(metrics, mc.Name)
}
}
@@ -236,6 +248,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partiti
return nodeMetrics, nil
}
// Cluster returns generated.ClusterResolver implementation.
func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver{r} }
// Job returns generated.JobResolver implementation.
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
@@ -245,6 +260,7 @@ func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResol
// Query returns generated.QueryResolver implementation.
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
type clusterResolver struct{ *Resolver }
type jobResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }

View File

@@ -32,8 +32,8 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range config.Clusters {
for _, partition := range cluster.Partitions {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", partition.SocketsPerNode, partition.CoresPerSocket)
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
var query sq.SelectBuilder
if groupBy == nil {
query = sq.Select(
@@ -54,7 +54,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
query = query.
Where("job.cluster = ?", cluster.Name).
Where("job.partition = ?", partition.Name)
Where("job.subcluster = ?", subcluster.Name)
query = repository.SecurityCheck(ctx, query)
for _, f := range filter {
@@ -254,7 +254,7 @@ func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilte
}
// Helper function for the jobsFootprints GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.MetricFootprints, error) {
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
return nil, err
@@ -268,19 +268,25 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
avgs[i] = make([]schema.Float, 0, len(jobs))
}
nodehours := make([]schema.Float, 0, len(jobs))
for _, job := range jobs {
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
return nil, err
}
nodehours = append(nodehours, schema.Float(float64(job.Duration)/60.0*float64(job.NumNodes)))
}
res := make([]*model.MetricFootprints, len(avgs))
for i, arr := range avgs {
res[i] = &model.MetricFootprints{
Name: metrics[i],
Footprints: arr,
Metric: metrics[i],
Data: arr,
}
}
return res, nil
return &model.Footprints{
Nodehours: nodehours,
Metrics: res,
}, nil
}