mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-12-14 03:16:15 +01:00
Add more information to status dash
This commit is contained in:
@@ -237,10 +237,12 @@ enum Aggregate {
|
|||||||
USER
|
USER
|
||||||
PROJECT
|
PROJECT
|
||||||
CLUSTER
|
CLUSTER
|
||||||
|
SUBCLUSTER
|
||||||
}
|
}
|
||||||
enum SortByAggregate {
|
enum SortByAggregate {
|
||||||
TOTALWALLTIME
|
TOTALWALLTIME
|
||||||
TOTALJOBS
|
TOTALJOBS
|
||||||
|
TOTALUSERS
|
||||||
TOTALNODES
|
TOTALNODES
|
||||||
TOTALNODEHOURS
|
TOTALNODEHOURS
|
||||||
TOTALCORES
|
TOTALCORES
|
||||||
@@ -501,11 +503,12 @@ type MetricHistoPoint {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type JobsStatistics {
|
type JobsStatistics {
|
||||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
id: ID! # If `groupBy` was used, ID of the user/project/cluster/subcluster
|
||||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||||
|
totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs)
|
||||||
totalJobs: Int! # Number of jobs
|
totalJobs: Int! # Number of jobs
|
||||||
runningJobs: Int! # Number of running jobs
|
runningJobs: Int! # Number of running jobs
|
||||||
shortJobs: Int! # Number of jobs with a duration of less than duration
|
shortJobs: Int! # Number of jobs with a duration of less than config'd ShortRunningJobsDuration
|
||||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||||
totalNodes: Int! # Sum of the nodes of all matched jobs
|
totalNodes: Int! # Sum of the nodes of all matched jobs
|
||||||
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
||||||
|
|||||||
@@ -201,6 +201,7 @@ type ComplexityRoot struct {
|
|||||||
TotalJobs func(childComplexity int) int
|
TotalJobs func(childComplexity int) int
|
||||||
TotalNodeHours func(childComplexity int) int
|
TotalNodeHours func(childComplexity int) int
|
||||||
TotalNodes func(childComplexity int) int
|
TotalNodes func(childComplexity int) int
|
||||||
|
TotalUsers func(childComplexity int) int
|
||||||
TotalWalltime func(childComplexity int) int
|
TotalWalltime func(childComplexity int) int
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1166,6 +1167,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
|
|||||||
|
|
||||||
return e.complexity.JobsStatistics.TotalNodes(childComplexity), true
|
return e.complexity.JobsStatistics.TotalNodes(childComplexity), true
|
||||||
|
|
||||||
|
case "JobsStatistics.totalUsers":
|
||||||
|
if e.complexity.JobsStatistics.TotalUsers == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
return e.complexity.JobsStatistics.TotalUsers(childComplexity), true
|
||||||
|
|
||||||
case "JobsStatistics.totalWalltime":
|
case "JobsStatistics.totalWalltime":
|
||||||
if e.complexity.JobsStatistics.TotalWalltime == nil {
|
if e.complexity.JobsStatistics.TotalWalltime == nil {
|
||||||
break
|
break
|
||||||
@@ -2567,10 +2575,12 @@ enum Aggregate {
|
|||||||
USER
|
USER
|
||||||
PROJECT
|
PROJECT
|
||||||
CLUSTER
|
CLUSTER
|
||||||
|
SUBCLUSTER
|
||||||
}
|
}
|
||||||
enum SortByAggregate {
|
enum SortByAggregate {
|
||||||
TOTALWALLTIME
|
TOTALWALLTIME
|
||||||
TOTALJOBS
|
TOTALJOBS
|
||||||
|
TOTALUSERS
|
||||||
TOTALNODES
|
TOTALNODES
|
||||||
TOTALNODEHOURS
|
TOTALNODEHOURS
|
||||||
TOTALCORES
|
TOTALCORES
|
||||||
@@ -2831,8 +2841,9 @@ type MetricHistoPoint {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type JobsStatistics {
|
type JobsStatistics {
|
||||||
id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster
|
id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster/subcluster
|
||||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||||
|
totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs)
|
||||||
totalJobs: Int! # Number of jobs
|
totalJobs: Int! # Number of jobs
|
||||||
runningJobs: Int! # Number of running jobs
|
runningJobs: Int! # Number of running jobs
|
||||||
shortJobs: Int! # Number of jobs with a duration of less than duration
|
shortJobs: Int! # Number of jobs with a duration of less than duration
|
||||||
@@ -8334,6 +8345,50 @@ func (ec *executionContext) fieldContext_JobsStatistics_name(_ context.Context,
|
|||||||
return fc, nil
|
return fc, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ec *executionContext) _JobsStatistics_totalUsers(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
|
||||||
|
fc, err := ec.fieldContext_JobsStatistics_totalUsers(ctx, field)
|
||||||
|
if err != nil {
|
||||||
|
return graphql.Null
|
||||||
|
}
|
||||||
|
ctx = graphql.WithFieldContext(ctx, fc)
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
ec.Error(ctx, ec.Recover(ctx, r))
|
||||||
|
ret = graphql.Null
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
|
||||||
|
ctx = rctx // use context from middleware stack in children
|
||||||
|
return obj.TotalUsers, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
ec.Error(ctx, err)
|
||||||
|
return graphql.Null
|
||||||
|
}
|
||||||
|
if resTmp == nil {
|
||||||
|
if !graphql.HasFieldError(ctx, fc) {
|
||||||
|
ec.Errorf(ctx, "must not be null")
|
||||||
|
}
|
||||||
|
return graphql.Null
|
||||||
|
}
|
||||||
|
res := resTmp.(int)
|
||||||
|
fc.Result = res
|
||||||
|
return ec.marshalNInt2int(ctx, field.Selections, res)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ec *executionContext) fieldContext_JobsStatistics_totalUsers(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
|
||||||
|
fc = &graphql.FieldContext{
|
||||||
|
Object: "JobsStatistics",
|
||||||
|
Field: field,
|
||||||
|
IsMethod: false,
|
||||||
|
IsResolver: false,
|
||||||
|
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
|
||||||
|
return nil, errors.New("field of type Int does not have child fields")
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return fc, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
|
func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) {
|
||||||
fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
|
fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -12636,6 +12691,8 @@ func (ec *executionContext) fieldContext_Query_jobsStatistics(ctx context.Contex
|
|||||||
return ec.fieldContext_JobsStatistics_id(ctx, field)
|
return ec.fieldContext_JobsStatistics_id(ctx, field)
|
||||||
case "name":
|
case "name":
|
||||||
return ec.fieldContext_JobsStatistics_name(ctx, field)
|
return ec.fieldContext_JobsStatistics_name(ctx, field)
|
||||||
|
case "totalUsers":
|
||||||
|
return ec.fieldContext_JobsStatistics_totalUsers(ctx, field)
|
||||||
case "totalJobs":
|
case "totalJobs":
|
||||||
return ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
|
return ec.fieldContext_JobsStatistics_totalJobs(ctx, field)
|
||||||
case "runningJobs":
|
case "runningJobs":
|
||||||
@@ -19240,6 +19297,11 @@ func (ec *executionContext) _JobsStatistics(ctx context.Context, sel ast.Selecti
|
|||||||
if out.Values[i] == graphql.Null {
|
if out.Values[i] == graphql.Null {
|
||||||
out.Invalids++
|
out.Invalids++
|
||||||
}
|
}
|
||||||
|
case "totalUsers":
|
||||||
|
out.Values[i] = ec._JobsStatistics_totalUsers(ctx, field, obj)
|
||||||
|
if out.Values[i] == graphql.Null {
|
||||||
|
out.Invalids++
|
||||||
|
}
|
||||||
case "totalJobs":
|
case "totalJobs":
|
||||||
out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj)
|
out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj)
|
||||||
if out.Values[i] == graphql.Null {
|
if out.Values[i] == graphql.Null {
|
||||||
|
|||||||
@@ -112,6 +112,7 @@ type JobStats struct {
|
|||||||
type JobsStatistics struct {
|
type JobsStatistics struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
TotalUsers int `json:"totalUsers"`
|
||||||
TotalJobs int `json:"totalJobs"`
|
TotalJobs int `json:"totalJobs"`
|
||||||
RunningJobs int `json:"runningJobs"`
|
RunningJobs int `json:"runningJobs"`
|
||||||
ShortJobs int `json:"shortJobs"`
|
ShortJobs int `json:"shortJobs"`
|
||||||
@@ -247,20 +248,22 @@ type User struct {
|
|||||||
type Aggregate string
|
type Aggregate string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
AggregateUser Aggregate = "USER"
|
AggregateUser Aggregate = "USER"
|
||||||
AggregateProject Aggregate = "PROJECT"
|
AggregateProject Aggregate = "PROJECT"
|
||||||
AggregateCluster Aggregate = "CLUSTER"
|
AggregateCluster Aggregate = "CLUSTER"
|
||||||
|
AggregateSubcluster Aggregate = "SUBCLUSTER"
|
||||||
)
|
)
|
||||||
|
|
||||||
var AllAggregate = []Aggregate{
|
var AllAggregate = []Aggregate{
|
||||||
AggregateUser,
|
AggregateUser,
|
||||||
AggregateProject,
|
AggregateProject,
|
||||||
AggregateCluster,
|
AggregateCluster,
|
||||||
|
AggregateSubcluster,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e Aggregate) IsValid() bool {
|
func (e Aggregate) IsValid() bool {
|
||||||
switch e {
|
switch e {
|
||||||
case AggregateUser, AggregateProject, AggregateCluster:
|
case AggregateUser, AggregateProject, AggregateCluster, AggregateSubcluster:
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@@ -292,6 +295,7 @@ type SortByAggregate string
|
|||||||
const (
|
const (
|
||||||
SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME"
|
SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME"
|
||||||
SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS"
|
SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS"
|
||||||
|
SortByAggregateTotalusers SortByAggregate = "TOTALUSERS"
|
||||||
SortByAggregateTotalnodes SortByAggregate = "TOTALNODES"
|
SortByAggregateTotalnodes SortByAggregate = "TOTALNODES"
|
||||||
SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
|
SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
|
||||||
SortByAggregateTotalcores SortByAggregate = "TOTALCORES"
|
SortByAggregateTotalcores SortByAggregate = "TOTALCORES"
|
||||||
@@ -303,6 +307,7 @@ const (
|
|||||||
var AllSortByAggregate = []SortByAggregate{
|
var AllSortByAggregate = []SortByAggregate{
|
||||||
SortByAggregateTotalwalltime,
|
SortByAggregateTotalwalltime,
|
||||||
SortByAggregateTotaljobs,
|
SortByAggregateTotaljobs,
|
||||||
|
SortByAggregateTotalusers,
|
||||||
SortByAggregateTotalnodes,
|
SortByAggregateTotalnodes,
|
||||||
SortByAggregateTotalnodehours,
|
SortByAggregateTotalnodehours,
|
||||||
SortByAggregateTotalcores,
|
SortByAggregateTotalcores,
|
||||||
@@ -313,7 +318,7 @@ var AllSortByAggregate = []SortByAggregate{
|
|||||||
|
|
||||||
func (e SortByAggregate) IsValid() bool {
|
func (e SortByAggregate) IsValid() bool {
|
||||||
switch e {
|
switch e {
|
||||||
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
|
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|||||||
@@ -581,7 +581,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
defaultDurationBins := "1h"
|
defaultDurationBins := "1h"
|
||||||
defaultMetricBins := 10
|
defaultMetricBins := 10
|
||||||
|
|
||||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
if requireField(ctx, "totalJobs") || requireField(ctx, "totalUsers") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
stats, err = r.Repo.JobsStats(ctx, filter)
|
stats, err = r.Repo.JobsStats(ctx, filter)
|
||||||
|
|||||||
@@ -21,13 +21,15 @@ import (
|
|||||||
|
|
||||||
// GraphQL validation should make sure that no unkown values can be specified.
|
// GraphQL validation should make sure that no unkown values can be specified.
|
||||||
var groupBy2column = map[model.Aggregate]string{
|
var groupBy2column = map[model.Aggregate]string{
|
||||||
model.AggregateUser: "job.hpc_user",
|
model.AggregateUser: "job.hpc_user",
|
||||||
model.AggregateProject: "job.project",
|
model.AggregateProject: "job.project",
|
||||||
model.AggregateCluster: "job.cluster",
|
model.AggregateCluster: "job.cluster",
|
||||||
|
model.AggregateSubcluster: "job.subcluster",
|
||||||
}
|
}
|
||||||
|
|
||||||
var sortBy2column = map[model.SortByAggregate]string{
|
var sortBy2column = map[model.SortByAggregate]string{
|
||||||
model.SortByAggregateTotaljobs: "totalJobs",
|
model.SortByAggregateTotaljobs: "totalJobs",
|
||||||
|
model.SortByAggregateTotalusers: "totalUsers",
|
||||||
model.SortByAggregateTotalwalltime: "totalWalltime",
|
model.SortByAggregateTotalwalltime: "totalWalltime",
|
||||||
model.SortByAggregateTotalnodes: "totalNodes",
|
model.SortByAggregateTotalnodes: "totalNodes",
|
||||||
model.SortByAggregateTotalnodehours: "totalNodeHours",
|
model.SortByAggregateTotalnodehours: "totalNodeHours",
|
||||||
@@ -76,8 +78,12 @@ func (r *JobRepository) buildStatsQuery(
|
|||||||
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||||
|
|
||||||
if col != "" {
|
if col != "" {
|
||||||
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
// Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||||
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
|
query = sq.Select(
|
||||||
|
col,
|
||||||
|
"name",
|
||||||
|
"COUNT(job.id) as totalJobs",
|
||||||
|
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
||||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
||||||
@@ -87,8 +93,10 @@ func (r *JobRepository) buildStatsQuery(
|
|||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
||||||
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
|
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
|
||||||
} else {
|
} else {
|
||||||
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
// Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||||
query = sq.Select("COUNT(job.id)",
|
query = sq.Select(
|
||||||
|
"COUNT(job.id) as totalJobs",
|
||||||
|
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||||
@@ -167,14 +175,14 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var id sql.NullString
|
var id sql.NullString
|
||||||
var name sql.NullString
|
var name sql.NullString
|
||||||
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
||||||
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
if err := rows.Scan(&id, &name, &jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||||
cclog.Warn("Error while scanning rows")
|
cclog.Warn("Error while scanning rows")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if id.Valid {
|
if id.Valid {
|
||||||
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
|
var totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
|
||||||
var personName string
|
var personName string
|
||||||
|
|
||||||
if name.Valid {
|
if name.Valid {
|
||||||
@@ -185,6 +193,10 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
totalJobs = int(jobs.Int64)
|
totalJobs = int(jobs.Int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if users.Valid {
|
||||||
|
totalUsers = int(users.Int64)
|
||||||
|
}
|
||||||
|
|
||||||
if walltime.Valid {
|
if walltime.Valid {
|
||||||
totalWalltime = int(walltime.Int64)
|
totalWalltime = int(walltime.Int64)
|
||||||
}
|
}
|
||||||
@@ -228,8 +240,9 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
stats = append(stats,
|
stats = append(stats,
|
||||||
&model.JobsStatistics{
|
&model.JobsStatistics{
|
||||||
ID: id.String,
|
ID: id.String,
|
||||||
TotalJobs: int(jobs.Int64),
|
TotalJobs: totalJobs,
|
||||||
TotalWalltime: int(walltime.Int64),
|
TotalUsers: totalUsers,
|
||||||
|
TotalWalltime: totalWalltime,
|
||||||
TotalNodes: totalNodes,
|
TotalNodes: totalNodes,
|
||||||
TotalNodeHours: totalNodeHours,
|
TotalNodeHours: totalNodeHours,
|
||||||
TotalCores: totalCores,
|
TotalCores: totalCores,
|
||||||
@@ -259,8 +272,8 @@ func (r *JobRepository) JobsStats(
|
|||||||
row := query.RunWith(r.DB).QueryRow()
|
row := query.RunWith(r.DB).QueryRow()
|
||||||
stats := make([]*model.JobsStatistics, 0, 1)
|
stats := make([]*model.JobsStatistics, 0, 1)
|
||||||
|
|
||||||
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
||||||
if err := row.Scan(&jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
if err := row.Scan(&jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||||
cclog.Warn("Error while scanning rows")
|
cclog.Warn("Error while scanning rows")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -280,6 +293,7 @@ func (r *JobRepository) JobsStats(
|
|||||||
stats = append(stats,
|
stats = append(stats,
|
||||||
&model.JobsStatistics{
|
&model.JobsStatistics{
|
||||||
TotalJobs: int(jobs.Int64),
|
TotalJobs: int(jobs.Int64),
|
||||||
|
TotalUsers: int(users.Int64),
|
||||||
TotalWalltime: int(walltime.Int64),
|
TotalWalltime: int(walltime.Int64),
|
||||||
TotalNodeHours: totalNodeHours,
|
TotalNodeHours: totalNodeHours,
|
||||||
TotalCoreHours: totalCoreHours,
|
TotalCoreHours: totalCoreHours,
|
||||||
|
|||||||
@@ -45,12 +45,17 @@
|
|||||||
let plotWidths = $state([]);
|
let plotWidths = $state([]);
|
||||||
// Bar Gauges
|
// Bar Gauges
|
||||||
let allocatedNodes = $state({});
|
let allocatedNodes = $state({});
|
||||||
|
let allocatedAccs = $state({});
|
||||||
let flopRate = $state({});
|
let flopRate = $state({});
|
||||||
let flopRateUnitPrefix = $state({});
|
let flopRateUnitPrefix = $state({});
|
||||||
let flopRateUnitBase = $state({});
|
let flopRateUnitBase = $state({});
|
||||||
let memBwRate = $state({});
|
let memBwRate = $state({});
|
||||||
let memBwRateUnitPrefix = $state({});
|
let memBwRateUnitPrefix = $state({});
|
||||||
let memBwRateUnitBase = $state({});
|
let memBwRateUnitBase = $state({});
|
||||||
|
// Plain Infos
|
||||||
|
let runningJobs = $state({});
|
||||||
|
let activeUsers = $state({});
|
||||||
|
let totalAccs = $state({});
|
||||||
|
|
||||||
/* Derived */
|
/* Derived */
|
||||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||||
@@ -63,6 +68,8 @@
|
|||||||
$metrics: [String!]
|
$metrics: [String!]
|
||||||
$from: Time!
|
$from: Time!
|
||||||
$to: Time!
|
$to: Time!
|
||||||
|
$filter: [JobFilter!]!
|
||||||
|
$paging: PageRequest!
|
||||||
) {
|
) {
|
||||||
nodeMetrics(
|
nodeMetrics(
|
||||||
cluster: $cluster
|
cluster: $cluster
|
||||||
@@ -87,11 +94,23 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# Only counts shared nodes once
|
||||||
allocatedNodes(cluster: $cluster) {
|
allocatedNodes(cluster: $cluster) {
|
||||||
name
|
name
|
||||||
count
|
count
|
||||||
}
|
}
|
||||||
|
# totalNodes includes multiples if shared jobs
|
||||||
|
jobsStatistics(
|
||||||
|
filter: $filter
|
||||||
|
page: $paging
|
||||||
|
sortBy: TOTALJOBS
|
||||||
|
groupBy: SUBCLUSTER
|
||||||
|
) {
|
||||||
|
id
|
||||||
|
totalJobs
|
||||||
|
totalUsers
|
||||||
|
totalAccs
|
||||||
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
variables: {
|
variables: {
|
||||||
@@ -99,7 +118,8 @@
|
|||||||
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
|
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
|
||||||
from: from.toISOString(),
|
from: from.toISOString(),
|
||||||
to: to.toISOString(),
|
to: to.toISOString(),
|
||||||
// filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
|
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
|
||||||
|
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
|
||||||
},
|
},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
@@ -110,10 +130,27 @@
|
|||||||
(c) => c.name == cluster,
|
(c) => c.name == cluster,
|
||||||
).subClusters;
|
).subClusters;
|
||||||
for (let subCluster of subClusters) {
|
for (let subCluster of subClusters) {
|
||||||
|
// Allocations
|
||||||
allocatedNodes[subCluster.name] =
|
allocatedNodes[subCluster.name] =
|
||||||
$statusQuery.data.allocatedNodes.find(
|
$statusQuery.data.allocatedNodes.find(
|
||||||
({ name }) => name == subCluster.name,
|
({ name }) => name == subCluster.name,
|
||||||
)?.count || 0;
|
)?.count || 0;
|
||||||
|
allocatedAccs[subCluster.name] =
|
||||||
|
$statusQuery.data.jobsStatistics.find(
|
||||||
|
({ id }) => id == subCluster.name,
|
||||||
|
)?.totalAccs || 0;
|
||||||
|
// Infos
|
||||||
|
activeUsers[subCluster.name] =
|
||||||
|
$statusQuery.data.jobsStatistics.find(
|
||||||
|
({ id }) => id == subCluster.name,
|
||||||
|
)?.totalUsers || 0;
|
||||||
|
runningJobs[subCluster.name] =
|
||||||
|
$statusQuery.data.jobsStatistics.find(
|
||||||
|
({ id }) => id == subCluster.name,
|
||||||
|
)?.totalJobs || 0;
|
||||||
|
totalAccs[subCluster.name] =
|
||||||
|
(subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || null;
|
||||||
|
// Keymetrics
|
||||||
flopRate[subCluster.name] =
|
flopRate[subCluster.name] =
|
||||||
Math.floor(
|
Math.floor(
|
||||||
sumUp($statusQuery.data.nodeMetrics, subCluster.name, "flops_any") *
|
sumUp($statusQuery.data.nodeMetrics, subCluster.name, "flops_any") *
|
||||||
@@ -158,9 +195,15 @@
|
|||||||
<Card class="h-auto mt-1">
|
<Card class="h-auto mt-1">
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
<CardTitle class="mb-0">SubCluster "{subCluster.name}"</CardTitle>
|
<CardTitle class="mb-0">SubCluster "{subCluster.name}"</CardTitle>
|
||||||
|
<span>{subCluster.processorType}</span>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<Table borderless>
|
<Table borderless>
|
||||||
|
<tr class="py-2">
|
||||||
|
<td style="font-size:x-large;">{runningJobs[subCluster.name]} Running Jobs</td>
|
||||||
|
<td colspan="2" style="font-size:x-large;">{activeUsers[subCluster.name]} Active Users</td>
|
||||||
|
</tr>
|
||||||
|
<hr class="my-1"/>
|
||||||
<tr class="py-2">
|
<tr class="py-2">
|
||||||
<th scope="col">Allocated Nodes</th>
|
<th scope="col">Allocated Nodes</th>
|
||||||
<td style="min-width: 100px;"
|
<td style="min-width: 100px;"
|
||||||
@@ -176,6 +219,23 @@
|
|||||||
Nodes</td
|
Nodes</td
|
||||||
>
|
>
|
||||||
</tr>
|
</tr>
|
||||||
|
{#if totalAccs[subCluster.name] !== null}
|
||||||
|
<tr class="py-2">
|
||||||
|
<th scope="col">Allocated Accelerators</th>
|
||||||
|
<td style="min-width: 100px;"
|
||||||
|
><div class="col">
|
||||||
|
<Progress
|
||||||
|
value={allocatedAccs[subCluster.name]}
|
||||||
|
max={totalAccs[subCluster.name]}
|
||||||
|
/>
|
||||||
|
</div></td
|
||||||
|
>
|
||||||
|
<td
|
||||||
|
>{allocatedAccs[subCluster.name]} / {totalAccs[subCluster.name]}
|
||||||
|
Accelerators</td
|
||||||
|
>
|
||||||
|
</tr>
|
||||||
|
{/if}
|
||||||
<tr class="py-2">
|
<tr class="py-2">
|
||||||
<th scope="col"
|
<th scope="col"
|
||||||
>Flop Rate (Any) <Icon
|
>Flop Rate (Any) <Icon
|
||||||
|
|||||||
Reference in New Issue
Block a user