mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-25 05:36:07 +02:00
Merge branch 'master' into add-influxdb2-client
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,7 @@ type Cluster struct {
|
||||
Name string `json:"name"`
|
||||
MetricConfig []*MetricConfig `json:"metricConfig"`
|
||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||
Partitions []*Partition `json:"partitions"`
|
||||
SubClusters []*SubCluster `json:"subClusters"`
|
||||
|
||||
// NOT part of the GraphQL API. This has to be a JSON object with a field `"kind"`.
|
||||
// All other fields depend on that kind (e.g. "cc-metric-store", "influxdb-v2").
|
||||
|
@@ -33,6 +33,11 @@ type FloatRange struct {
|
||||
To float64 `json:"to"`
|
||||
}
|
||||
|
||||
type Footprints struct {
|
||||
Nodehours []schema.Float `json:"nodehours"`
|
||||
Metrics []*MetricFootprints `json:"metrics"`
|
||||
}
|
||||
|
||||
type HistoPoint struct {
|
||||
Count int `json:"count"`
|
||||
Value int `json:"value"`
|
||||
@@ -92,19 +97,20 @@ type JobsStatistics struct {
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
Name string `json:"name"`
|
||||
Unit string `json:"unit"`
|
||||
Scope schema.MetricScope `json:"scope"`
|
||||
Timestep int `json:"timestep"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
Name string `json:"name"`
|
||||
Unit string `json:"unit"`
|
||||
Scope schema.MetricScope `json:"scope"`
|
||||
Aggregation *string `json:"aggregation"`
|
||||
Timestep int `json:"timestep"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
}
|
||||
|
||||
type MetricFootprints struct {
|
||||
Name string `json:"name"`
|
||||
Footprints []schema.Float `json:"footprints"`
|
||||
Metric string `json:"metric"`
|
||||
Data []schema.Float `json:"data"`
|
||||
}
|
||||
|
||||
type NodeMetrics struct {
|
||||
@@ -122,8 +128,16 @@ type PageRequest struct {
|
||||
Page int `json:"page"`
|
||||
}
|
||||
|
||||
type Partition struct {
|
||||
type StringInput struct {
|
||||
Eq *string `json:"eq"`
|
||||
Contains *string `json:"contains"`
|
||||
StartsWith *string `json:"startsWith"`
|
||||
EndsWith *string `json:"endsWith"`
|
||||
}
|
||||
|
||||
type SubCluster struct {
|
||||
Name string `json:"name"`
|
||||
Nodes string `json:"nodes"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
@@ -134,13 +148,6 @@ type Partition struct {
|
||||
Topology *Topology `json:"topology"`
|
||||
}
|
||||
|
||||
type StringInput struct {
|
||||
Eq *string `json:"eq"`
|
||||
Contains *string `json:"contains"`
|
||||
StartsWith *string `json:"startsWith"`
|
||||
EndsWith *string `json:"endsWith"`
|
||||
}
|
||||
|
||||
type TimeRange struct {
|
||||
From *time.Time `json:"from"`
|
||||
To *time.Time `json:"to"`
|
||||
@@ -160,6 +167,12 @@ type Topology struct {
|
||||
Accelerators []*Accelerator `json:"accelerators"`
|
||||
}
|
||||
|
||||
type User struct {
|
||||
Username string `json:"username"`
|
||||
Name string `json:"name"`
|
||||
Email string `json:"email"`
|
||||
}
|
||||
|
||||
type Aggregate string
|
||||
|
||||
const (
|
||||
|
@@ -11,8 +11,10 @@ type Job {
|
||||
user: String!
|
||||
project: String!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
startTime: Time!
|
||||
duration: Int!
|
||||
walltime: Int!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
@@ -22,20 +24,24 @@ type Job {
|
||||
arrayJobId: Int!
|
||||
monitoringStatus: Int!
|
||||
state: JobState!
|
||||
metaData: Any
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
|
||||
metaData: Any
|
||||
userData: User
|
||||
}
|
||||
|
||||
type Cluster {
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
metricConfig: [MetricConfig!]!
|
||||
filterRanges: FilterRanges!
|
||||
partitions: [Partition!]!
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
}
|
||||
|
||||
type Partition {
|
||||
type SubCluster {
|
||||
name: String!
|
||||
nodes: String!
|
||||
processorType: String!
|
||||
socketsPerNode: Int!
|
||||
coresPerSocket: Int!
|
||||
@@ -62,14 +68,15 @@ type Accelerator {
|
||||
}
|
||||
|
||||
type MetricConfig {
|
||||
name: String!
|
||||
unit: String!
|
||||
scope: MetricScope!
|
||||
timestep: Int!
|
||||
peak: Float!
|
||||
normal: Float!
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
name: String!
|
||||
unit: String!
|
||||
scope: MetricScope!
|
||||
aggregation: String
|
||||
timestep: Int!
|
||||
peak: Float!
|
||||
normal: Float!
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
}
|
||||
|
||||
type Tag {
|
||||
@@ -118,8 +125,13 @@ type StatsSeries {
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
name: String!
|
||||
footprints: [NullableFloat!]!
|
||||
metric: String!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type Footprints {
|
||||
nodehours: [NullableFloat!]!
|
||||
metrics: [MetricFootprints!]!
|
||||
}
|
||||
|
||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
||||
@@ -134,13 +146,21 @@ type Count {
|
||||
count: Int!
|
||||
}
|
||||
|
||||
type User {
|
||||
username: String!
|
||||
name: String!
|
||||
email: String!
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
|
||||
user(username: String!): User
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): [MetricFootprints]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
|
||||
|
@@ -18,14 +18,22 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/schema"
|
||||
)
|
||||
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
|
||||
return r.Repo.FetchMetadata(obj)
|
||||
func (r *clusterResolver) Partitions(ctx context.Context, obj *model.Cluster) ([]string, error) {
|
||||
return r.Repo.Partitions(obj.Name)
|
||||
}
|
||||
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(&obj.ID)
|
||||
}
|
||||
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
|
||||
return r.Repo.FetchMetadata(obj)
|
||||
}
|
||||
|
||||
func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.User, error) {
|
||||
return auth.FetchUser(ctx, r.DB, obj.User)
|
||||
}
|
||||
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name)
|
||||
if err != nil {
|
||||
@@ -98,6 +106,10 @@ func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(nil)
|
||||
}
|
||||
|
||||
func (r *queryResolver) User(ctx context.Context, username string) (*model.User, error) {
|
||||
return auth.FetchUser(ctx, r.DB, username)
|
||||
}
|
||||
|
||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||
numericId, err := strconv.ParseInt(id, 10, 64)
|
||||
if err != nil {
|
||||
@@ -144,7 +156,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
||||
return res, err
|
||||
}
|
||||
|
||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.MetricFootprints, error) {
|
||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
return r.jobsFootprints(ctx, filter, metrics)
|
||||
}
|
||||
|
||||
@@ -204,7 +216,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partiti
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, mc := range config.GetClusterConfig(cluster).MetricConfig {
|
||||
for _, mc := range config.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
@@ -236,6 +248,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partiti
|
||||
return nodeMetrics, nil
|
||||
}
|
||||
|
||||
// Cluster returns generated.ClusterResolver implementation.
|
||||
func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver{r} }
|
||||
|
||||
// Job returns generated.JobResolver implementation.
|
||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||
|
||||
@@ -245,6 +260,7 @@ func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResol
|
||||
// Query returns generated.QueryResolver implementation.
|
||||
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
||||
|
||||
type clusterResolver struct{ *Resolver }
|
||||
type jobResolver struct{ *Resolver }
|
||||
type mutationResolver struct{ *Resolver }
|
||||
type queryResolver struct{ *Resolver }
|
||||
|
@@ -32,8 +32,8 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
|
||||
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
||||
for _, cluster := range config.Clusters {
|
||||
for _, partition := range cluster.Partitions {
|
||||
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", partition.SocketsPerNode, partition.CoresPerSocket)
|
||||
for _, subcluster := range cluster.SubClusters {
|
||||
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
|
||||
var query sq.SelectBuilder
|
||||
if groupBy == nil {
|
||||
query = sq.Select(
|
||||
@@ -54,7 +54,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
|
||||
query = query.
|
||||
Where("job.cluster = ?", cluster.Name).
|
||||
Where("job.partition = ?", partition.Name)
|
||||
Where("job.subcluster = ?", subcluster.Name)
|
||||
|
||||
query = repository.SecurityCheck(ctx, query)
|
||||
for _, f := range filter {
|
||||
@@ -254,7 +254,7 @@ func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilte
|
||||
}
|
||||
|
||||
// Helper function for the jobsFootprints GraphQL query placed here so that schema.resolvers.go is not too full.
|
||||
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.MetricFootprints, error) {
|
||||
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -268,19 +268,25 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
avgs[i] = make([]schema.Float, 0, len(jobs))
|
||||
}
|
||||
|
||||
nodehours := make([]schema.Float, 0, len(jobs))
|
||||
for _, job := range jobs {
|
||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodehours = append(nodehours, schema.Float(float64(job.Duration)/60.0*float64(job.NumNodes)))
|
||||
}
|
||||
|
||||
res := make([]*model.MetricFootprints, len(avgs))
|
||||
for i, arr := range avgs {
|
||||
res[i] = &model.MetricFootprints{
|
||||
Name: metrics[i],
|
||||
Footprints: arr,
|
||||
Metric: metrics[i],
|
||||
Data: arr,
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
return &model.Footprints{
|
||||
Nodehours: nodehours,
|
||||
Metrics: res,
|
||||
}, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user