Merge pull request #205 from ClusterCockpit/166_add_scopes_analysis

166 add scopes analysis
This commit is contained in:
Jan Eitzinger 2023-08-31 12:03:37 +02:00 committed by GitHub
commit b836eee1e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 1295 additions and 534 deletions

View File

@ -156,12 +156,18 @@ type MetricFootprints {
} }
type Footprints { type Footprints {
nodehours: [NullableFloat!]! timeWeights: TimeWeights!
metrics: [MetricFootprints!]! metrics: [MetricFootprints!]!
} }
type TimeWeights {
nodeHours: [NullableFloat!]!
accHours: [NullableFloat!]!
coreHours: [NullableFloat!]!
}
enum Aggregate { USER, PROJECT, CLUSTER } enum Aggregate { USER, PROJECT, CLUSTER }
enum Weights { NODE_COUNT, NODE_HOURS } enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS }
type NodeMetrics { type NodeMetrics {
host: String! host: String!
@ -192,8 +198,7 @@ type Query {
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList! jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]! jobsStatistics(filter: [JobFilter!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, weight: Weights, limit: Int): [Count!]!
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
@ -288,11 +293,16 @@ type JobsStatistics {
runningJobs: Int! # Number of running jobs runningJobs: Int! # Number of running jobs
shortJobs: Int! # Number of jobs with a duration of less than duration shortJobs: Int! # Number of jobs with a duration of less than duration
totalWalltime: Int! # Sum of the duration of all matched jobs in hours totalWalltime: Int! # Sum of the duration of all matched jobs in hours
totalNodes: Int! # Sum of the nodes of all matched jobs
totalNodeHours: Int! # Sum of the node hours of all matched jobs totalNodeHours: Int! # Sum of the node hours of all matched jobs
totalCores: Int! # Sum of the cores of all matched jobs
totalCoreHours: Int! # Sum of the core hours of all matched jobs totalCoreHours: Int! # Sum of the core hours of all matched jobs
totalAccs: Int! # Sum of the accs of all matched jobs
totalAccHours: Int! # Sum of the gpu hours of all matched jobs totalAccHours: Int! # Sum of the gpu hours of all matched jobs
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
} }
input PageRequest { input PageRequest {

View File

@ -26,21 +26,25 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
StopJobsExceedingWalltime: 0, StopJobsExceedingWalltime: 0,
ShortRunningJobsDuration: 5 * 60, ShortRunningJobsDuration: 5 * 60,
UiDefaults: map[string]interface{}{ UiDefaults: map[string]interface{}{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}}, "analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"plot_general_colorBackground": true, "plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"}, "plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3, "plot_general_lineWidth": 3,
"plot_list_jobsPerPage": 50, "plot_list_jobsPerPage": 50,
"plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw"}, "plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw"},
"plot_view_plotsPerRow": 3, "plot_view_plotsPerRow": 3,
"plot_view_showPolarplot": true, "plot_view_showPolarplot": true,
"plot_view_showRoofline": true, "plot_view_showRoofline": true,
"plot_view_showStatTable": true, "plot_view_showStatTable": true,
"system_view_selectedMetric": "cpu_load", "system_view_selectedMetric": "cpu_load",
"analysis_view_selectedTopEntity": "user",
"analysis_view_selectedTopCategory": "totalWalltime",
"status_view_selectedTopUserCategory": "totalJobs",
"status_view_selectedTopProjectCategory": "totalJobs",
}, },
} }

File diff suppressed because it is too large Load Diff

View File

@ -22,8 +22,8 @@ type FloatRange struct {
} }
type Footprints struct { type Footprints struct {
Nodehours []schema.Float `json:"nodehours"` TimeWeights *TimeWeights `json:"timeWeights"`
Metrics []*MetricFootprints `json:"metrics"` Metrics []*MetricFootprints `json:"metrics"`
} }
type HistoPoint struct { type HistoPoint struct {
@ -91,11 +91,16 @@ type JobsStatistics struct {
RunningJobs int `json:"runningJobs"` RunningJobs int `json:"runningJobs"`
ShortJobs int `json:"shortJobs"` ShortJobs int `json:"shortJobs"`
TotalWalltime int `json:"totalWalltime"` TotalWalltime int `json:"totalWalltime"`
TotalNodes int `json:"totalNodes"`
TotalNodeHours int `json:"totalNodeHours"` TotalNodeHours int `json:"totalNodeHours"`
TotalCores int `json:"totalCores"`
TotalCoreHours int `json:"totalCoreHours"` TotalCoreHours int `json:"totalCoreHours"`
TotalAccs int `json:"totalAccs"`
TotalAccHours int `json:"totalAccHours"` TotalAccHours int `json:"totalAccHours"`
HistDuration []*HistoPoint `json:"histDuration"` HistDuration []*HistoPoint `json:"histDuration"`
HistNumNodes []*HistoPoint `json:"histNumNodes"` HistNumNodes []*HistoPoint `json:"histNumNodes"`
HistNumCores []*HistoPoint `json:"histNumCores"`
HistNumAccs []*HistoPoint `json:"histNumAccs"`
} }
type MetricFootprints struct { type MetricFootprints struct {
@ -133,6 +138,12 @@ type TimeRangeOutput struct {
To time.Time `json:"to"` To time.Time `json:"to"`
} }
type TimeWeights struct {
NodeHours []schema.Float `json:"nodeHours"`
AccHours []schema.Float `json:"accHours"`
CoreHours []schema.Float `json:"coreHours"`
}
type User struct { type User struct {
Username string `json:"username"` Username string `json:"username"`
Name string `json:"name"` Name string `json:"name"`
@ -182,6 +193,59 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String())) fmt.Fprint(w, strconv.Quote(e.String()))
} }
type SortByAggregate string
const (
SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME"
SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS"
SortByAggregateTotalnodes SortByAggregate = "TOTALNODES"
SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
SortByAggregateTotalcores SortByAggregate = "TOTALCORES"
SortByAggregateTotalcorehours SortByAggregate = "TOTALCOREHOURS"
SortByAggregateTotalaccs SortByAggregate = "TOTALACCS"
SortByAggregateTotalacchours SortByAggregate = "TOTALACCHOURS"
)
var AllSortByAggregate = []SortByAggregate{
SortByAggregateTotalwalltime,
SortByAggregateTotaljobs,
SortByAggregateTotalnodes,
SortByAggregateTotalnodehours,
SortByAggregateTotalcores,
SortByAggregateTotalcorehours,
SortByAggregateTotalaccs,
SortByAggregateTotalacchours,
}
func (e SortByAggregate) IsValid() bool {
switch e {
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
return true
}
return false
}
func (e SortByAggregate) String() string {
return string(e)
}
func (e *SortByAggregate) UnmarshalGQL(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("enums must be strings")
}
*e = SortByAggregate(str)
if !e.IsValid() {
return fmt.Errorf("%s is not a valid SortByAggregate", str)
}
return nil
}
func (e SortByAggregate) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String()))
}
type SortDirectionEnum string type SortDirectionEnum string
const ( const (
@ -222,44 +286,3 @@ func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
func (e SortDirectionEnum) MarshalGQL(w io.Writer) { func (e SortDirectionEnum) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String())) fmt.Fprint(w, strconv.Quote(e.String()))
} }
type Weights string
const (
WeightsNodeCount Weights = "NODE_COUNT"
WeightsNodeHours Weights = "NODE_HOURS"
)
var AllWeights = []Weights{
WeightsNodeCount,
WeightsNodeHours,
}
func (e Weights) IsValid() bool {
switch e {
case WeightsNodeCount, WeightsNodeHours:
return true
}
return false
}
func (e Weights) String() string {
return string(e)
}
func (e *Weights) UnmarshalGQL(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("enums must be strings")
}
*e = Weights(str)
if !e.IsValid() {
return fmt.Errorf("%s is not a valid Weights", str)
}
return nil
}
func (e Weights) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String()))
}

View File

@ -244,34 +244,34 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
} }
// JobsStatistics is the resolver for the jobsStatistics field. // JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) { func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
var err error var err error
var stats []*model.JobsStatistics var stats []*model.JobsStatistics
if requireField(ctx, "totalJobs") { if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
if groupBy == nil { if groupBy == nil {
stats, err = r.Repo.JobsStats(ctx, filter) stats, err = r.Repo.JobsStats(ctx, filter)
} else { } else {
stats, err = r.Repo.JobsStatsGrouped(ctx, filter, groupBy) stats, err = r.Repo.JobsStatsGrouped(ctx, filter, page, sortBy, groupBy)
} }
} else { } else {
stats = make([]*model.JobsStatistics, 0, 1) stats = make([]*model.JobsStatistics, 0, 1)
stats = append(stats, stats = append(stats, &model.JobsStatistics{})
&model.JobsStatistics{})
} }
if groupBy != nil { if groupBy != nil {
if requireField(ctx, "shortJobs") { if requireField(ctx, "shortJobs") {
stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "short") stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "short")
} }
if requireField(ctx, "RunningJobs") { if requireField(ctx, "runningJobs") {
stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "running") stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "running")
} }
} else { } else {
if requireField(ctx, "shortJobs") { if requireField(ctx, "shortJobs") {
stats, err = r.Repo.AddJobCount(ctx, filter, stats, "short") stats, err = r.Repo.AddJobCount(ctx, filter, stats, "short")
} }
if requireField(ctx, "RunningJobs") { if requireField(ctx, "runningJobs") {
stats, err = r.Repo.AddJobCount(ctx, filter, stats, "running") stats, err = r.Repo.AddJobCount(ctx, filter, stats, "running")
} }
} }
@ -280,7 +280,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
return nil, err return nil, err
} }
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") { if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
if groupBy == nil { if groupBy == nil {
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0]) stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
if err != nil { if err != nil {
@ -294,24 +294,6 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
return stats, nil return stats, nil
} }
// JobsCount is the resolver for the jobsCount field.
func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) {
counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit)
if err != nil {
log.Warn("Error while counting grouped jobs")
return nil, err
}
res := make([]*model.Count, 0, len(counts))
for name, count := range counts {
res = append(res, &model.Count{
Name: name,
Count: count,
})
}
return res, nil
}
// RooflineHeatmap is the resolver for the rooflineHeatmap field. // RooflineHeatmap is the resolver for the rooflineHeatmap field.
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) { func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY) return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)

View File

@ -15,6 +15,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
) )
const MAX_JOBS_FOR_ANALYSIS = 500 const MAX_JOBS_FOR_ANALYSIS = 500
@ -106,7 +107,11 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
avgs[i] = make([]schema.Float, 0, len(jobs)) avgs[i] = make([]schema.Float, 0, len(jobs))
} }
nodehours := make([]schema.Float, 0, len(jobs)) timeweights := new(model.TimeWeights)
timeweights.NodeHours = make([]schema.Float, 0, len(jobs))
timeweights.AccHours = make([]schema.Float, 0, len(jobs))
timeweights.CoreHours = make([]schema.Float, 0, len(jobs))
for _, job := range jobs { for _, job := range jobs {
if job.MonitoringStatus == schema.MonitoringStatusDisabled || job.MonitoringStatus == schema.MonitoringStatusArchivingFailed { if job.MonitoringStatus == schema.MonitoringStatusDisabled || job.MonitoringStatus == schema.MonitoringStatusArchivingFailed {
continue continue
@ -117,7 +122,18 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
return nil, err return nil, err
} }
nodehours = append(nodehours, schema.Float(float64(job.Duration)/60.0*float64(job.NumNodes))) // #166 collect arrays: Null values or no null values?
timeweights.NodeHours = append(timeweights.NodeHours, schema.Float(float64(job.Duration)/60.0*float64(job.NumNodes)))
if job.NumAcc > 0 {
timeweights.AccHours = append(timeweights.AccHours, schema.Float(float64(job.Duration)/60.0*float64(job.NumAcc)))
} else {
timeweights.AccHours = append(timeweights.AccHours, schema.Float(1.0))
}
if job.NumHWThreads > 0 {
timeweights.CoreHours = append(timeweights.CoreHours, schema.Float(float64(job.Duration)/60.0*float64(job.NumHWThreads))) // SQLite HWThreads == Cores; numCoresForJob(job)
} else {
timeweights.CoreHours = append(timeweights.CoreHours, schema.Float(1.0))
}
} }
res := make([]*model.MetricFootprints, len(avgs)) res := make([]*model.MetricFootprints, len(avgs))
@ -129,11 +145,34 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
} }
return &model.Footprints{ return &model.Footprints{
Nodehours: nodehours, TimeWeights: timeweights,
Metrics: res, Metrics: res,
}, nil }, nil
} }
// func numCoresForJob(job *schema.Job) (numCores int) {
// subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
// if scerr != nil {
// return 1
// }
// totalJobCores := 0
// topology := subcluster.Topology
// for _, host := range job.Resources {
// hwthreads := host.HWThreads
// if hwthreads == nil {
// hwthreads = topology.Node
// }
// hostCores, _ := topology.GetCoresFromHWThreads(hwthreads)
// totalJobCores += len(hostCores)
// }
// return totalJobCores
// }
func requireField(ctx context.Context, name string) bool { func requireField(ctx context.Context, name string) bool {
fields := graphql.CollectAllFields(ctx) fields := graphql.CollectAllFields(ctx)

View File

@ -506,7 +506,7 @@ func (ccms *CCMetricStore) LoadStats(
metrics []string, metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) { ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) // #166 Add scope shere for analysis view accelerator normalization?
if err != nil { if err != nil {
log.Warn("Error while building query") log.Warn("Error while building query")
return nil, err return nil, err

View File

@ -182,7 +182,7 @@ func LoadAverages(
ctx context.Context) error { ctx context.Context) error {
if job.State != schema.JobStateRunning && useArchive { if job.State != schema.JobStateRunning && useArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
} }
repo, ok := metricDataRepos[job.Cluster] repo, ok := metricDataRepos[job.Cluster]
@ -190,7 +190,7 @@ func LoadAverages(
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster) return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
} }
stats, err := repo.LoadStats(job, metrics, ctx) stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil { if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project) log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err return err

View File

@ -455,69 +455,6 @@ func (r *JobRepository) DeleteJobById(id int64) error {
return err return err
} }
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
func (r *JobRepository) CountGroupedJobs(
ctx context.Context,
aggreg model.Aggregate,
filters []*model.JobFilter,
weight *model.Weights,
limit *int) (map[string]int, error) {
start := time.Now()
if !aggreg.IsValid() {
return nil, errors.New("invalid aggregate")
}
runner := (sq.BaseRunner)(r.stmtCache)
count := "count(*) as count"
if weight != nil {
switch *weight {
case model.WeightsNodeCount:
count = "sum(job.num_nodes) as count"
case model.WeightsNodeHours:
now := time.Now().Unix()
count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now)
runner = r.DB
default:
log.Debugf("CountGroupedJobs() Weight %v unknown.", *weight)
}
}
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job."+string(aggreg)).OrderBy("count DESC"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
q = BuildWhereClause(f, q)
}
if limit != nil {
q = q.Limit(uint64(*limit))
}
counts := map[string]int{}
rows, err := q.RunWith(runner).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
for rows.Next() {
var group string
var count int
if err := rows.Scan(&group, &count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
counts[group] = count
}
log.Debugf("Timer CountGroupedJobs %s", time.Since(start))
return counts, nil
}
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) { func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
stmt := sq.Update("job"). stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus). Set("monitoring_status", monitoringStatus).

View File

@ -18,13 +18,17 @@ import (
sq "github.com/Masterminds/squirrel" sq "github.com/Masterminds/squirrel"
) )
// SecurityCheck-less, private: Returns a list of jobs matching the provided filters. page and order are optional- func (r *JobRepository) QueryJobs(
func (r *JobRepository) queryJobs( ctx context.Context,
query sq.SelectBuilder,
filters []*model.JobFilter, filters []*model.JobFilter,
page *model.PageRequest, page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) { order *model.OrderByInput) ([]*schema.Job, error) {
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
}
if order != nil { if order != nil {
field := toSnakeCase(order.Field) field := toSnakeCase(order.Field)
@ -67,34 +71,15 @@ func (r *JobRepository) queryJobs(
return jobs, nil return jobs, nil
} }
// testFunction for queryJobs func (r *JobRepository) CountJobs(
func (r *JobRepository) testQueryJobs(
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
return r.queryJobs(sq.Select(jobColumns...).From("job"), filters, page, order)
}
// Public function with added securityCheck, calls private queryJobs function above
func (r *JobRepository) QueryJobs(
ctx context.Context, ctx context.Context,
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
}
return r.queryJobs(query, filters, page, order)
}
// SecurityCheck-less, private: Returns the number of jobs matching the filters
func (r *JobRepository) countJobs(query sq.SelectBuilder,
filters []*model.JobFilter) (int, error) { filters []*model.JobFilter) (int, error) {
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
if qerr != nil {
return 0, qerr
}
for _, f := range filters { for _, f := range filters {
query = BuildWhereClause(f, query) query = BuildWhereClause(f, query)
} }
@ -107,27 +92,6 @@ func (r *JobRepository) countJobs(query sq.SelectBuilder,
return count, nil return count, nil
} }
// testFunction for countJobs
func (r *JobRepository) testCountJobs(
filters []*model.JobFilter) (int, error) {
return r.countJobs(sq.Select("count(*)").From("job"), filters)
}
// Public function with added securityCheck, calls private countJobs function above
func (r *JobRepository) CountJobs(
ctx context.Context,
filters []*model.JobFilter) (int, error) {
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
if qerr != nil {
return 0, qerr
}
return r.countJobs(query, filters)
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) { func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx) user := GetUserFromContext(ctx)
if user == nil { if user == nil {

View File

@ -5,10 +5,12 @@
package repository package repository
import ( import (
"context"
"testing" "testing"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
) )
@ -94,7 +96,7 @@ func BenchmarkDB_CountJobs(b *testing.B) {
b.RunParallel(func(pb *testing.PB) { b.RunParallel(func(pb *testing.PB) {
for pb.Next() { for pb.Next() {
_, err := db.testCountJobs([]*model.JobFilter{filter}) _, err := db.CountJobs(getContext(b), []*model.JobFilter{filter})
noErr(b, err) noErr(b, err)
} }
}) })
@ -118,20 +120,37 @@ func BenchmarkDB_QueryJobs(b *testing.B) {
b.RunParallel(func(pb *testing.PB) { b.RunParallel(func(pb *testing.PB) {
for pb.Next() { for pb.Next() {
_, err := db.testQueryJobs([]*model.JobFilter{filter}, page, order) _, err := db.QueryJobs(getContext(b), []*model.JobFilter{filter}, page, order)
noErr(b, err) noErr(b, err)
} }
}) })
}) })
} }
func getContext(tb testing.TB) context.Context {
tb.Helper()
var roles []string
roles = append(roles, schema.GetRoleString(schema.RoleAdmin))
projects := make([]string, 0)
user := &schema.User{
Username: "demo",
Name: "The man",
Roles: roles,
Projects: projects,
AuthSource: schema.AuthViaLDAP,
}
ctx := context.Background()
return context.WithValue(ctx, ContextUserKey, user)
}
func setup(tb testing.TB) *JobRepository { func setup(tb testing.TB) *JobRepository {
tb.Helper() tb.Helper()
log.Init("warn", true) log.Init("warn", true)
dbfile := "testdata/job.db" dbfile := "testdata/job.db"
err := MigrateDB("sqlite3", dbfile) err := MigrateDB("sqlite3", dbfile)
noErr(tb, err) noErr(tb, err)
Connect("sqlite3", dbfile) Connect("sqlite3", dbfile)
return GetJobRepository() return GetJobRepository()
} }

View File

@ -23,6 +23,17 @@ var groupBy2column = map[model.Aggregate]string{
model.AggregateCluster: "job.cluster", model.AggregateCluster: "job.cluster",
} }
var sortBy2column = map[model.SortByAggregate]string{
model.SortByAggregateTotaljobs: "totalJobs",
model.SortByAggregateTotalwalltime: "totalWalltime",
model.SortByAggregateTotalnodes: "totalNodes",
model.SortByAggregateTotalnodehours: "totalNodeHours",
model.SortByAggregateTotalcores: "totalCores",
model.SortByAggregateTotalcorehours: "totalCoreHours",
model.SortByAggregateTotalaccs: "totalAccs",
model.SortByAggregateTotalacchours: "totalAccHours",
}
func (r *JobRepository) buildCountQuery( func (r *JobRepository) buildCountQuery(
filter []*model.JobFilter, filter []*model.JobFilter,
kind string, kind string,
@ -60,19 +71,26 @@ func (r *JobRepository) buildStatsQuery(
castType := r.getCastType() castType := r.getCastType()
if col != "" { if col != "" {
// Scan columns: id, totalJobs, totalWalltime, totalNodeHours, totalCoreHours, totalAccHours // Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id)", query = sq.Select(col, "COUNT(job.id) as totalJobs",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType), fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s) as totalWalltime", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes) / 3600) as %s)", castType), fmt.Sprintf("CAST(SUM(job.num_nodes) as %s) as totalNodes", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_hwthreads) / 3600) as %s)", castType), fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes) / 3600) as %s) as totalNodeHours", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_acc) / 3600) as %s)", castType), fmt.Sprintf("CAST(SUM(job.num_hwthreads) as %s) as totalCores", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_hwthreads) / 3600) as %s) as totalCoreHours", castType),
fmt.Sprintf("CAST(SUM(job.num_acc) as %s) as totalAccs", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_acc) / 3600) as %s) as totalAccHours", castType),
).From("job").GroupBy(col) ).From("job").GroupBy(col)
} else { } else {
// Scan columns: totalJobs, totalWalltime, totalNodeHours, totalCoreHours, totalAccHours // Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)", query = sq.Select("COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType), fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
fmt.Sprintf("CAST(SUM(job.num_nodes) as %s)", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes) / 3600) as %s)", castType), fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes) / 3600) as %s)", castType),
fmt.Sprintf("CAST(SUM(job.num_hwthreads) as %s)", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_hwthreads) / 3600) as %s)", castType), fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_hwthreads) / 3600) as %s)", castType),
fmt.Sprintf("CAST(SUM(job.num_acc) as %s)", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_acc) / 3600) as %s)", castType), fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_acc) / 3600) as %s)", castType),
).From("job") ).From("job")
} }
@ -112,16 +130,28 @@ func (r *JobRepository) getCastType() string {
func (r *JobRepository) JobsStatsGrouped( func (r *JobRepository) JobsStatsGrouped(
ctx context.Context, ctx context.Context,
filter []*model.JobFilter, filter []*model.JobFilter,
page *model.PageRequest,
sortBy *model.SortByAggregate,
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) { groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
start := time.Now() start := time.Now()
col := groupBy2column[*groupBy] col := groupBy2column[*groupBy]
query := r.buildStatsQuery(filter, col) query := r.buildStatsQuery(filter, col)
query, err := SecurityCheck(ctx, query) query, err := SecurityCheck(ctx, query)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if sortBy != nil {
sortBy := sortBy2column[*sortBy]
query = query.OrderBy(fmt.Sprintf("%s DESC", sortBy))
}
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
rows, err := query.RunWith(r.DB).Query() rows, err := query.RunWith(r.DB).Query()
if err != nil { if err != nil {
log.Warn("Error while querying DB for job statistics") log.Warn("Error while querying DB for job statistics")
@ -132,15 +162,36 @@ func (r *JobRepository) JobsStatsGrouped(
for rows.Next() { for rows.Next() {
var id sql.NullString var id sql.NullString
var jobs, walltime, nodeHours, coreHours, accHours sql.NullInt64 var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &nodeHours, &coreHours, &accHours); err != nil { if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
log.Warn("Error while scanning rows") log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
if id.Valid { if id.Valid {
var totalCoreHours, totalAccHours int var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
if jobs.Valid {
totalJobs = int(jobs.Int64)
}
if walltime.Valid {
totalWalltime = int(walltime.Int64)
}
if nodes.Valid {
totalNodes = int(nodes.Int64)
}
if cores.Valid {
totalCores = int(cores.Int64)
}
if accs.Valid {
totalAccs = int(accs.Int64)
}
if nodeHours.Valid {
totalNodeHours = int(nodeHours.Int64)
}
if coreHours.Valid { if coreHours.Valid {
totalCoreHours = int(coreHours.Int64) totalCoreHours = int(coreHours.Int64)
} }
@ -154,9 +205,13 @@ func (r *JobRepository) JobsStatsGrouped(
&model.JobsStatistics{ &model.JobsStatistics{
ID: id.String, ID: id.String,
Name: name, Name: name,
TotalJobs: int(jobs.Int64), TotalJobs: totalJobs,
TotalWalltime: int(walltime.Int64), TotalWalltime: totalWalltime,
TotalNodes: totalNodes,
TotalNodeHours: totalNodeHours,
TotalCores: totalCores,
TotalCoreHours: totalCoreHours, TotalCoreHours: totalCoreHours,
TotalAccs: totalAccs,
TotalAccHours: totalAccHours}) TotalAccHours: totalAccHours})
} else { } else {
stats = append(stats, stats = append(stats,
@ -164,7 +219,11 @@ func (r *JobRepository) JobsStatsGrouped(
ID: id.String, ID: id.String,
TotalJobs: int(jobs.Int64), TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64), TotalWalltime: int(walltime.Int64),
TotalNodes: totalNodes,
TotalNodeHours: totalNodeHours,
TotalCores: totalCores,
TotalCoreHours: totalCoreHours, TotalCoreHours: totalCoreHours,
TotalAccs: totalAccs,
TotalAccHours: totalAccHours}) TotalAccHours: totalAccHours})
} }
} }
@ -188,15 +247,18 @@ func (r *JobRepository) JobsStats(
row := query.RunWith(r.DB).QueryRow() row := query.RunWith(r.DB).QueryRow()
stats := make([]*model.JobsStatistics, 0, 1) stats := make([]*model.JobsStatistics, 0, 1)
var jobs, walltime, nodeHours, coreHours, accHours sql.NullInt64 var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
if err := row.Scan(&jobs, &walltime, &nodeHours, &coreHours, &accHours); err != nil { if err := row.Scan(&jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
log.Warn("Error while scanning rows") log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
if jobs.Valid { if jobs.Valid {
var totalCoreHours, totalAccHours int var totalNodeHours, totalCoreHours, totalAccHours int
if nodeHours.Valid {
totalNodeHours = int(nodeHours.Int64)
}
if coreHours.Valid { if coreHours.Valid {
totalCoreHours = int(coreHours.Int64) totalCoreHours = int(coreHours.Int64)
} }
@ -207,6 +269,7 @@ func (r *JobRepository) JobsStats(
&model.JobsStatistics{ &model.JobsStatistics{
TotalJobs: int(jobs.Int64), TotalJobs: int(jobs.Int64),
TotalWalltime: int(walltime.Int64), TotalWalltime: int(walltime.Int64),
TotalNodeHours: totalNodeHours,
TotalCoreHours: totalCoreHours, TotalCoreHours: totalCoreHours,
TotalAccHours: totalAccHours}) TotalAccHours: totalAccHours})
} }
@ -321,7 +384,7 @@ func (r *JobRepository) AddJobCount(
return nil, err return nil, err
} }
counts := make(map[string]int) var count int
for rows.Next() { for rows.Next() {
var cnt sql.NullInt64 var cnt sql.NullInt64
@ -329,20 +392,22 @@ func (r *JobRepository) AddJobCount(
log.Warn("Error while scanning rows") log.Warn("Error while scanning rows")
return nil, err return nil, err
} }
count = int(cnt.Int64)
} }
switch kind { switch kind {
case "running": case "running":
for _, s := range stats { for _, s := range stats {
s.RunningJobs = counts[s.ID] s.RunningJobs = count
} }
case "short": case "short":
for _, s := range stats { for _, s := range stats {
s.ShortJobs = counts[s.ID] s.ShortJobs = count
} }
} }
log.Debugf("Timer JobJobCount %s", time.Since(start)) log.Debugf("Timer AddJobCount %s", time.Since(start))
return stats, nil return stats, nil
} }
@ -367,6 +432,18 @@ func (r *JobRepository) AddHistograms(
return nil, err return nil, err
} }
stat.HistNumCores, err = r.jobsStatisticsHistogram(ctx, "job.num_hwthreads as value", filter)
if err != nil {
log.Warn("Error while loading job statistics histogram: num hwthreads")
return nil, err
}
stat.HistNumAccs, err = r.jobsStatisticsHistogram(ctx, "job.num_acc as value", filter)
if err != nil {
log.Warn("Error while loading job statistics histogram: num acc")
return nil, err
}
log.Debugf("Timer AddHistograms %s", time.Since(start)) log.Debugf("Timer AddHistograms %s", time.Since(start))
return stat, nil return stat, nil
} }

View File

@ -7,6 +7,8 @@ package repository
import ( import (
"fmt" "fmt"
"testing" "testing"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
) )
func TestBuildJobStatsQuery(t *testing.T) { func TestBuildJobStatsQuery(t *testing.T) {
@ -19,3 +21,15 @@ func TestBuildJobStatsQuery(t *testing.T) {
fmt.Printf("SQL: %s\n", sql) fmt.Printf("SQL: %s\n", sql)
} }
func TestJobStats(t *testing.T) {
r := setup(t)
filter := &model.JobFilter{}
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{filter})
noErr(t, err)
if stats[0].TotalJobs != 6 {
t.Fatalf("Want 98, Got %d", stats[0].TotalJobs)
}
}

View File

@ -1,7 +1,7 @@
<script> <script>
import { init, convert2uplot } from './utils.js' import { init, convert2uplot } from './utils.js'
import { getContext, onMount } from 'svelte' import { getContext, onMount } from 'svelte'
import { queryStore, gql, getContextClient } from '@urql/svelte' import { queryStore, gql, getContextClient, mutationStore } from '@urql/svelte'
import { Row, Col, Spinner, Card, Table, Icon } from 'sveltestrap' import { Row, Col, Spinner, Card, Table, Icon } from 'sveltestrap'
import Filters from './filters/Filters.svelte' import Filters from './filters/Filters.svelte'
import PlotSelection from './PlotSelection.svelte' import PlotSelection from './PlotSelection.svelte'
@ -42,6 +42,20 @@
$: metrics = [...new Set([...metricsInHistograms, ...metricsInScatterplots.flat()])] $: metrics = [...new Set([...metricsInHistograms, ...metricsInScatterplots.flat()])]
const sortOptions = [
{key: 'totalWalltime', label: 'Walltime'},
{key: 'totalNodeHours', label: 'Node Hours'},
{key: 'totalCoreHours', label: 'Core Hours'},
{key: 'totalAccHours', label: 'Accelerator Hours'}
]
const groupOptions = [
{key: 'user', label: 'User Name'},
{key: 'project', label: 'Project ID'}
]
let sortSelection = sortOptions.find((option) => option.key == ccconfig[`analysis_view_selectedTopCategory:${filterPresets.cluster}`]) || sortOptions.find((option) => option.key == ccconfig.analysis_view_selectedTopCategory)
let groupSelection = groupOptions.find((option) => option.key == ccconfig[`analysis_view_selectedTopEntity:${filterPresets.cluster}`]) || groupOptions.find((option) => option.key == ccconfig.analysis_view_selectedTopEntity)
getContext('on-init')(({ data }) => { getContext('on-init')(({ data }) => {
if (data != null) { if (data != null) {
cluster = data.clusters.find(c => c.name == filterPresets.cluster) cluster = data.clusters.find(c => c.name == filterPresets.cluster)
@ -62,23 +76,39 @@
totalJobs totalJobs
shortJobs shortJobs
totalWalltime totalWalltime
totalNodeHours
totalCoreHours totalCoreHours
totalAccHours
histDuration { count, value } histDuration { count, value }
histNumNodes { count, value } histNumCores { count, value }
} }
topUsers: jobsCount(filter: $jobFilters, groupBy: USER, weight: NODE_HOURS, limit: 5) { name, count }
} }
`, `,
variables: { jobFilters } variables: { jobFilters }
}) })
$: topQuery = queryStore({
client: client,
query: gql`
query($jobFilters: [JobFilter!]!, $paging: PageRequest!, $sortBy: SortByAggregate!, $groupBy: Aggregate!) {
topList: jobsStatistics(filter: $jobFilters, page: $paging, sortBy: $sortBy, groupBy: $groupBy) {
id
totalWalltime
totalNodeHours
totalCoreHours
totalAccHours
}
}
`,
variables: { jobFilters, paging: { itemsPerPage: 10, page: 1 }, sortBy: sortSelection.key.toUpperCase(), groupBy: groupSelection.key.toUpperCase() }
})
$: footprintsQuery = queryStore({ $: footprintsQuery = queryStore({
client: client, client: client,
query: gql` query: gql`
query($jobFilters: [JobFilter!]!, $metrics: [String!]!) { query($jobFilters: [JobFilter!]!, $metrics: [String!]!) {
footprints: jobsFootprints(filter: $jobFilters, metrics: $metrics) { footprints: jobsFootprints(filter: $jobFilters, metrics: $metrics) {
nodehours, timeWeights { nodeHours, accHours, coreHours },
metrics { metric, data } metrics { metric, data }
} }
}`, }`,
@ -97,6 +127,53 @@
variables: { jobFilters, rows: 50, cols: 50, minX: 0.01, minY: 1., maxX: 1000., maxY } variables: { jobFilters, rows: 50, cols: 50, minX: 0.01, minY: 1., maxX: 1000., maxY }
}) })
const updateConfigurationMutation = ({ name, value }) => {
return mutationStore({
client: client,
query: gql`
mutation ($name: String!, $value: String!) {
updateConfiguration(name: $name, value: $value)
}
`,
variables: { name, value }
});
}
function updateEntityConfiguration(select) {
if (ccconfig[`analysis_view_selectedTopEntity:${filterPresets.cluster}`] != select) {
updateConfigurationMutation({ name: `analysis_view_selectedTopEntity:${filterPresets.cluster}`, value: JSON.stringify(select) })
.subscribe(res => {
if (res.fetching === false && !res.error) {
// console.log(`analysis_view_selectedTopEntity:${filterPresets.cluster}` + ' -> Updated!')
} else if (res.fetching === false && res.error) {
throw res.error
}
})
} else {
// console.log('No Mutation Required: Entity')
}
};
function updateCategoryConfiguration(select) {
if (ccconfig[`analysis_view_selectedTopCategory:${filterPresets.cluster}`] != select) {
updateConfigurationMutation({ name: `analysis_view_selectedTopCategory:${filterPresets.cluster}`, value: JSON.stringify(select) })
.subscribe(res => {
if (res.fetching === false && !res.error) {
// console.log(`analysis_view_selectedTopCategory:${filterPresets.cluster}` + ' -> Updated!')
} else if (res.fetching === false && res.error) {
throw res.error
}
})
} else {
// console.log('No Mutation Required: Category')
}
};
$: updateEntityConfiguration(groupSelection.key)
$: updateCategoryConfiguration(sortSelection.key)
onMount(() => filterComponent.update()) onMount(() => filterComponent.update())
</script> </script>
@ -151,36 +228,82 @@
<th scope="col">Total Walltime</th> <th scope="col">Total Walltime</th>
<td>{$statsQuery.data.stats[0].totalWalltime}</td> <td>{$statsQuery.data.stats[0].totalWalltime}</td>
</tr> </tr>
<tr>
<th scope="col">Total Node Hours</th>
<td>{$statsQuery.data.stats[0].totalNodeHours}</td>
</tr>
<tr> <tr>
<th scope="col">Total Core Hours</th> <th scope="col">Total Core Hours</th>
<td>{$statsQuery.data.stats[0].totalCoreHours}</td> <td>{$statsQuery.data.stats[0].totalCoreHours}</td>
</tr> </tr>
<tr>
<th scope="col">Total Accelerator Hours</th>
<td>{$statsQuery.data.stats[0].totalAccHours}</td>
</tr>
</Table> </Table>
</Col> </Col>
<Col> <Col>
<div bind:clientWidth={colWidth1}> <div bind:clientWidth={colWidth1}>
<h5>Top Users</h5> <h5>Top
{#key $statsQuery.data.topUsers} <select class="p-0" bind:value={groupSelection}>
<Pie {#each groupOptions as option}
size={colWidth1} <option value={option}>
sliceLabel='Hours' {option.key.charAt(0).toUpperCase() + option.key.slice(1)}s
quantities={$statsQuery.data.topUsers.sort((a, b) => b.count - a.count).map((tu) => tu.count)} </option>
entities={$statsQuery.data.topUsers.sort((a, b) => b.count - a.count).map((tu) => tu.name)} {/each}
/> </select>
</h5>
{#key $topQuery.data}
{#if $topQuery.fetching}
<Spinner/>
{:else if $topQuery.error}
<Card body color="danger">{$topQuery.error.message}</Card>
{:else}
<Pie
size={colWidth1}
sliceLabel={sortSelection.label}
quantities={$topQuery.data.topList.map((t) => t[sortSelection.key])}
entities={$topQuery.data.topList.map((t) => t.id)}
/>
{/if}
{/key} {/key}
</div> </div>
</Col> </Col>
<Col> <Col>
<Table> {#key $topQuery.data}
<tr class="mb-2"><th>Legend</th><th>User Name</th><th>Node Hours</th></tr> {#if $topQuery.fetching}
{#each $statsQuery.data.topUsers.sort((a, b) => b.count - a.count) as { name, count }, i} <Spinner/>
<tr> {:else if $topQuery.error}
<td><Icon name="circle-fill" style="color: {colors[i]};"/></td> <Card body color="danger">{$topQuery.error.message}</Card>
<th scope="col"><a href="/monitoring/user/{name}?cluster={cluster.name}">{name}</a></th> {:else}
<td>{count}</td> <Table>
</tr> <tr class="mb-2">
{/each} <th>Legend</th>
</Table> <th>{groupSelection.label}</th>
<th>
<select class="p-0" bind:value={sortSelection}>
{#each sortOptions as option}
<option value={option}>
{option.label}
</option>
{/each}
</select>
</th>
</tr>
{#each $topQuery.data.topList as te, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};"/></td>
{#if groupSelection.key == 'User'}
<th scope="col"><a href="/monitoring/user/{te.id}?cluster={cluster.name}">{te.id}</a></th>
{:else}
<th scope="col"><a href="/monitoring/jobs/?cluster={cluster.name}&project={te.id}&projectMatch=eq">{te.id}</a></th>
{/if}
<td>{te[sortSelection.key]}</td>
</tr>
{/each}
</Table>
{/if}
{/key}
</Col> </Col>
</Row> </Row>
<Row cols={3} class="mb-2"> <Row cols={3} class="mb-2">
@ -217,13 +340,13 @@
</Col> </Col>
<Col> <Col>
<div bind:clientWidth={colWidth4}> <div bind:clientWidth={colWidth4}>
{#key $statsQuery.data.stats[0].histNumNodes} {#key $statsQuery.data.stats[0].histNumCores}
<Histogram <Histogram
width={colWidth4} height={300} width={colWidth4} height={300}
data={convert2uplot($statsQuery.data.stats[0].histNumNodes)} data={convert2uplot($statsQuery.data.stats[0].histNumCores)}
title="Number of Nodes Distribution" title="Number of Cores Distribution"
xlabel="Allocated Nodes" xlabel="Allocated Cores"
xunit="Nodes" xunit="Cores"
ylabel="Number of Jobs" ylabel="Number of Jobs"
yunit="Jobs"/> yunit="Jobs"/>
{/key} {/key}
@ -244,8 +367,9 @@
<Row> <Row>
<Col> <Col>
<Card body> <Card body>
These histograms show the distribution of the averages of all jobs matching the filters. Each job/average is weighted by its node hours. These histograms show the distribution of the averages of all jobs matching the filters. Each job/average is weighted by its node hours by default
Note that some metrics could be disabled for specific subclusters as per metriConfig and thus could affect shown average values. (Accelerator hours for native accelerator scope metrics, coreHours for native core scope metrics).
Note that some metrics could be disabled for specific subclusters as per metricConfig and thus could affect shown average values.
</Card> </Card>
<br/> <br/>
</Col> </Col>
@ -257,7 +381,8 @@
let:width let:width
renderFor="analysis" renderFor="analysis"
items={metricsInHistograms.map(metric => ({ metric, ...binsFromFootprint( items={metricsInHistograms.map(metric => ({ metric, ...binsFromFootprint(
$footprintsQuery.data.footprints.nodehours, $footprintsQuery.data.footprints.timeWeights,
metricConfig(cluster.name, metric)?.scope,
$footprintsQuery.data.footprints.metrics.find(f => f.metric == metric).data, numBins) }))} $footprintsQuery.data.footprints.metrics.find(f => f.metric == metric).data, numBins) }))}
itemsPerRow={ccconfig.plot_view_plotsPerRow}> itemsPerRow={ccconfig.plot_view_plotsPerRow}>
@ -265,11 +390,11 @@
data={convert2uplot(item.bins)} data={convert2uplot(item.bins)}
width={width} height={250} width={width} height={250}
title="Average Distribution of '{item.metric}'" title="Average Distribution of '{item.metric}'"
xlabel={`${item.metric} average [${(metricConfig(cluster.name, item.metric)?.unit?.prefix ? metricConfig(cluster.name, item.metric)?.unit?.prefix : '') + xlabel={`${item.metric} bin maximum [${(metricConfig(cluster.name, item.metric)?.unit?.prefix ? metricConfig(cluster.name, item.metric)?.unit?.prefix : '') +
(metricConfig(cluster.name, item.metric)?.unit?.base ? metricConfig(cluster.name, item.metric)?.unit?.base : '')}]`} (metricConfig(cluster.name, item.metric)?.unit?.base ? metricConfig(cluster.name, item.metric)?.unit?.base : '')}]`}
xunit={`${(metricConfig(cluster.name, item.metric)?.unit?.prefix ? metricConfig(cluster.name, item.metric)?.unit?.prefix : '') + xunit={`${(metricConfig(cluster.name, item.metric)?.unit?.prefix ? metricConfig(cluster.name, item.metric)?.unit?.prefix : '') +
(metricConfig(cluster.name, item.metric)?.unit?.base ? metricConfig(cluster.name, item.metric)?.unit?.base : '')}`} (metricConfig(cluster.name, item.metric)?.unit?.base ? metricConfig(cluster.name, item.metric)?.unit?.base : '')}`}
ylabel="Node Hours" ylabel="Normalized Hours"
yunit="Hours"/> yunit="Hours"/>
</PlotTable> </PlotTable>
</Col> </Col>
@ -279,7 +404,7 @@
<Col> <Col>
<Card body> <Card body>
Each circle represents one job. The size of a circle is proportional to its node hours. Darker circles mean multiple jobs have the same averages for the respective metrics. Each circle represents one job. The size of a circle is proportional to its node hours. Darker circles mean multiple jobs have the same averages for the respective metrics.
Note that some metrics could be disabled for specific subclusters as per metriConfig and thus could affect shown average values. Note that some metrics could be disabled for specific subclusters as per metricConfig and thus could affect shown average values.
</Card> </Card>
<br/> <br/>
</Col> </Col>
@ -301,7 +426,7 @@
(metricConfig(cluster.name, item.m1)?.unit?.base ? metricConfig(cluster.name, item.m1)?.unit?.base : '')}]`} (metricConfig(cluster.name, item.m1)?.unit?.base ? metricConfig(cluster.name, item.m1)?.unit?.base : '')}]`}
yLabel={`${item.m2} [${(metricConfig(cluster.name, item.m2)?.unit?.prefix ? metricConfig(cluster.name, item.m2)?.unit?.prefix : '') + yLabel={`${item.m2} [${(metricConfig(cluster.name, item.m2)?.unit?.prefix ? metricConfig(cluster.name, item.m2)?.unit?.prefix : '') +
(metricConfig(cluster.name, item.m2)?.unit?.base ? metricConfig(cluster.name, item.m2)?.unit?.base : '')}]`} (metricConfig(cluster.name, item.m2)?.unit?.base ? metricConfig(cluster.name, item.m2)?.unit?.base : '')}]`}
X={item.f1} Y={item.f2} S={$footprintsQuery.data.footprints.nodehours} /> X={item.f1} Y={item.f2} S={$footprintsQuery.data.footprints.timeWeights.nodeHours} />
</PlotTable> </PlotTable>
</Col> </Col>
</Row> </Row>

View File

@ -1,4 +1,5 @@
<script> <script>
import { getContext } from 'svelte'
import Refresher from './joblist/Refresher.svelte' import Refresher from './joblist/Refresher.svelte'
import Roofline, { transformPerNodeData } from './plots/Roofline.svelte' import Roofline, { transformPerNodeData } from './plots/Roofline.svelte'
import Pie, { colors } from './plots/Pie.svelte' import Pie, { colors } from './plots/Pie.svelte'
@ -6,14 +7,24 @@
import { Row, Col, Spinner, Card, CardHeader, CardTitle, CardBody, Table, Progress, Icon } from 'sveltestrap' import { Row, Col, Spinner, Card, CardHeader, CardTitle, CardBody, Table, Progress, Icon } from 'sveltestrap'
import { init, convert2uplot } from './utils.js' import { init, convert2uplot } from './utils.js'
import { scaleNumbers } from './units.js' import { scaleNumbers } from './units.js'
import { queryStore, gql, getContextClient } from '@urql/svelte' import { queryStore, gql, getContextClient, mutationStore } from '@urql/svelte'
const { query: initq } = init() const { query: initq } = init()
const ccconfig = getContext("cc-config")
export let cluster export let cluster
let plotWidths = [], colWidth1 = 0, colWidth2 let plotWidths = [], colWidth1 = 0, colWidth2
let from = new Date(Date.now() - 5 * 60 * 1000), to = new Date(Date.now()) let from = new Date(Date.now() - 5 * 60 * 1000), to = new Date(Date.now())
const topOptions = [
{key: 'totalJobs', label: 'Jobs'},
{key: 'totalNodes', label: 'Nodes'},
{key: 'totalCores', label: 'Cores'},
{key: 'totalAccs', label: 'Accelerators'},
]
let topProjectSelection = topOptions.find((option) => option.key == ccconfig[`status_view_selectedTopProjectCategory:${cluster}`]) || topOptions.find((option) => option.key == ccconfig.status_view_selectedTopProjectCategory)
let topUserSelection = topOptions.find((option) => option.key == ccconfig[`status_view_selectedTopUserCategory:${cluster}`]) || topOptions.find((option) => option.key == ccconfig.status_view_selectedTopUserCategory)
const client = getContextClient(); const client = getContextClient();
$: mainQuery = queryStore({ $: mainQuery = queryStore({
@ -36,11 +47,11 @@
stats: jobsStatistics(filter: $filter) { stats: jobsStatistics(filter: $filter) {
histDuration { count, value } histDuration { count, value }
histNumNodes { count, value } histNumNodes { count, value }
histNumCores { count, value }
histNumAccs { count, value }
} }
allocatedNodes(cluster: $cluster) { name, count } allocatedNodes(cluster: $cluster) { name, count }
topUsers: jobsCount(filter: $filter, groupBy: USER, weight: NODE_COUNT, limit: 10) { name, count }
topProjects: jobsCount(filter: $filter, groupBy: PROJECT, weight: NODE_COUNT, limit: 10) { name, count }
}`, }`,
variables: { variables: {
cluster: cluster, metrics: ['flops_any', 'mem_bw'], from: from.toISOString(), to: to.toISOString(), cluster: cluster, metrics: ['flops_any', 'mem_bw'], from: from.toISOString(), to: to.toISOString(),
@ -48,6 +59,39 @@
} }
}) })
const paging = { itemsPerPage: 10, page: 1 }; // Top 10
$: topUserQuery = queryStore({
client: client,
query: gql`
query($filter: [JobFilter!]!, $paging: PageRequest!, $sortBy: SortByAggregate!) {
topUser: jobsStatistics(filter: $filter, page: $paging, sortBy: $sortBy, groupBy: USER) {
id
totalJobs
totalNodes
totalCores
totalAccs
}
}
`,
variables: { filter: [{ state: ['running'] }, { cluster: { eq: cluster } }], paging, sortBy: topUserSelection.key.toUpperCase() }
})
$: topProjectQuery = queryStore({
client: client,
query: gql`
query($filter: [JobFilter!]!, $paging: PageRequest!, $sortBy: SortByAggregate!) {
topProjects: jobsStatistics(filter: $filter, page: $paging, sortBy: $sortBy, groupBy: PROJECT) {
id
totalJobs
totalNodes
totalCores
totalAccs
}
}
`,
variables: { filter: [{ state: ['running'] }, { cluster: { eq: cluster } }], paging, sortBy: topProjectSelection.key.toUpperCase() }
})
const sumUp = (data, subcluster, metric) => data.reduce((sum, node) => node.subCluster == subcluster const sumUp = (data, subcluster, metric) => data.reduce((sum, node) => node.subCluster == subcluster
? sum + (node.metrics.find(m => m.name == metric)?.metric.series.reduce((sum, series) => sum + series.data[series.data.length - 1], 0) || 0) ? sum + (node.metrics.find(m => m.name == metric)?.metric.series.reduce((sum, series) => sum + series.data[series.data.length - 1], 0) || 0)
: sum, 0) : sum, 0)
@ -66,6 +110,51 @@
} }
} }
const updateConfigurationMutation = ({ name, value }) => {
return mutationStore({
client: client,
query: gql`
mutation ($name: String!, $value: String!) {
updateConfiguration(name: $name, value: $value)
}
`,
variables: { name, value }
});
}
function updateTopUserConfiguration(select) {
if (ccconfig[`status_view_selectedTopUserCategory:${cluster}`] != select) {
updateConfigurationMutation({ name: `status_view_selectedTopUserCategory:${cluster}`, value: JSON.stringify(select) })
.subscribe(res => {
if (res.fetching === false && !res.error) {
// console.log(`status_view_selectedTopUserCategory:${cluster}` + ' -> Updated!')
} else if (res.fetching === false && res.error) {
throw res.error
}
})
} else {
// console.log('No Mutation Required: Top User')
}
}
function updateTopProjectConfiguration(select) {
if (ccconfig[`status_view_selectedTopProjectCategory:${cluster}`] != select) {
updateConfigurationMutation({ name: `status_view_selectedTopProjectCategory:${cluster}`, value: JSON.stringify(select) })
.subscribe(res => {
if (res.fetching === false && !res.error) {
// console.log(`status_view_selectedTopProjectCategory:${cluster}` + ' -> Updated!')
} else if (res.fetching === false && res.error) {
throw res.error
}
})
} else {
// console.log('No Mutation Required: Top Project')
}
};
$: updateTopUserConfiguration(topUserSelection.key)
$: updateTopProjectConfiguration(topProjectSelection.key)
</script> </script>
<!-- Loading indicator & Refresh --> <!-- Loading indicator & Refresh -->
@ -160,52 +249,103 @@
<Row cols={4}> <Row cols={4}>
<Col class="p-2"> <Col class="p-2">
<div bind:clientWidth={colWidth1}> <div bind:clientWidth={colWidth1}>
<h4 class="text-center">Top Users</h4> <h4 class="text-center">Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)}</h4>
{#key $mainQuery.data} {#key $topUserQuery.data}
<Pie {#if $topUserQuery.fetching}
size={colWidth1} <Spinner/>
sliceLabel='Jobs' {:else if $topUserQuery.error}
quantities={$mainQuery.data.topUsers.sort((a, b) => b.count - a.count).map((tu) => tu.count)} <Card body color="danger">{$topUserQuery.error.message}</Card>
entities={$mainQuery.data.topUsers.sort((a, b) => b.count - a.count).map((tu) => tu.name)} {:else}
<Pie
/> size={colWidth1}
sliceLabel={topUserSelection.label}
quantities={$topUserQuery.data.topUser.map((tu) => tu[topUserSelection.key])}
entities={$topUserQuery.data.topUser.map((tu) => tu.id)}
/>
{/if}
{/key} {/key}
</div> </div>
</Col> </Col>
<Col class="px-4 py-2"> <Col class="px-4 py-2">
<Table> {#key $topUserQuery.data}
<tr class="mb-2"><th>Legend</th><th>User Name</th><th>Number of Nodes</th></tr> {#if $topUserQuery.fetching}
{#each $mainQuery.data.topUsers.sort((a, b) => b.count - a.count) as { name, count }, i} <Spinner/>
<tr> {:else if $topUserQuery.error}
<td><Icon name="circle-fill" style="color: {colors[i]};"/></td> <Card body color="danger">{$topUserQuery.error.message}</Card>
<th scope="col"><a href="/monitoring/user/{name}?cluster={cluster}&state=running">{name}</a></th> {:else}
<td>{count}</td> <Table>
</tr> <tr class="mb-2">
{/each} <th>Legend</th>
</Table> <th>User Name</th>
<th>Number of
<select class="p-0" bind:value={topUserSelection}>
{#each topOptions as option}
<option value={option}>
{option.label}
</option>
{/each}
</select>
</th>
</tr>
{#each $topUserQuery.data.topUser as tu, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};"/></td>
<th scope="col"><a href="/monitoring/user/{tu.id}?cluster={cluster}&state=running">{tu.id}</a></th>
<td>{tu[topUserSelection.key]}</td>
</tr>
{/each}
</Table>
{/if}
{/key}
</Col> </Col>
<Col class="p-2"> <Col class="p-2">
<h4 class="text-center">Top Projects</h4> <h4 class="text-center">Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)}</h4>
{#key $mainQuery.data} {#key $topProjectQuery.data}
<Pie {#if $topProjectQuery.fetching}
size={colWidth1} <Spinner/>
sliceLabel='Jobs' {:else if $topProjectQuery.error}
quantities={$mainQuery.data.topProjects.sort((a, b) => b.count - a.count).map((tp) => tp.count)} <Card body color="danger">{$topProjectQuery.error.message}</Card>
entities={$mainQuery.data.topProjects.sort((a, b) => b.count - a.count).map((tp) => tp.name)} {:else}
/> <Pie
size={colWidth1}
sliceLabel={topProjectSelection.label}
quantities={$topProjectQuery.data.topProjects.map((tp) => tp[topProjectSelection.key])}
entities={$topProjectQuery.data.topProjects.map((tp) => tp.id)}
/>
{/if}
{/key} {/key}
</Col> </Col>
<Col class="px-4 py-2"> <Col class="px-4 py-2">
<Table> {#key $topProjectQuery.data}
<tr class="mb-2"><th>Legend</th><th>Project Code</th><th>Number of Nodes</th></tr> {#if $topProjectQuery.fetching}
{#each $mainQuery.data.topProjects.sort((a, b) => b.count - a.count) as { name, count }, i} <Spinner/>
<tr> {:else if $topProjectQuery.error}
<td><Icon name="circle-fill" style="color: {colors[i]};"/></td> <Card body color="danger">{$topProjectQuery.error.message}</Card>
<th scope="col"><a href="/monitoring/jobs/?cluster={cluster}&state=running&project={name}&projectMatch=eq">{name}</a></th> {:else}
<td>{count}</td> <Table>
</tr> <tr class="mb-2">
{/each} <th>Legend</th>
</Table> <th>Project Code</th>
<th>Number of
<select class="p-0" bind:value={topProjectSelection}>
{#each topOptions as option}
<option value={option}>
{option.label}
</option>
{/each}
</select>
</th>
</tr>
{#each $topProjectQuery.data.topProjects as tp, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};"/></td>
<th scope="col"><a href="/monitoring/jobs/?cluster={cluster}&state=running&project={tp.id}&projectMatch=eq">{tp.id}</a></th>
<td>{tp[topProjectSelection.key]}</td>
</tr>
{/each}
</Table>
{/if}
{/key}
</Col> </Col>
</Row> </Row>
<hr class="my-2"/> <hr class="my-2"/>
@ -237,4 +377,32 @@
{/key} {/key}
</Col> </Col>
</Row> </Row>
<Row cols={2}>
<Col class="p-2">
<div bind:clientWidth={colWidth2}>
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumCores)}
width={colWidth2 - 25}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Cores"
ylabel="Number of Jobs"
yunit="Jobs"/>
{/key}
</div>
</Col>
<Col class="p-2">
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumAccs)}
width={colWidth2 - 25}
title="Number of Accelerators Distribution"
xlabel="Allocated Accs"
xunit="Accs"
ylabel="Number of Jobs"
yunit="Jobs"/>
{/key}
</Col>
</Row>
{/if} {/if}

View File

@ -43,14 +43,14 @@
export let entities export let entities
export let displayLegend = false export let displayLegend = false
const data = { $: data = {
labels: entities, labels: entities,
datasets: [ datasets: [
{ {
label: sliceLabel, label: sliceLabel,
data: quantities, data: quantities,
fill: 1, fill: 1,
backgroundColor: colors.slice(0, quantities.length), backgroundColor: colors.slice(0, quantities.length)
} }
] ]
} }

View File

@ -325,7 +325,7 @@ export function convert2uplot(canvasData) {
return uplotData return uplotData
} }
export function binsFromFootprint(weights, values, numBins) { export function binsFromFootprint(weights, scope, values, numBins) {
let min = 0, max = 0 let min = 0, max = 0
if (values.length != 0) { if (values.length != 0) {
for (let x of values) { for (let x of values) {
@ -338,10 +338,23 @@ export function binsFromFootprint(weights, values, numBins) {
if (numBins == null || numBins < 3) if (numBins == null || numBins < 3)
numBins = 3 numBins = 3
let scopeWeights
switch (scope) {
case 'core':
scopeWeights = weights.coreHours
break
case 'accelerator':
scopeWeights = weights.accHours
break
default: // every other scope: use 'node'
scopeWeights = weights.nodeHours
}
const bins = new Array(numBins).fill(0) const bins = new Array(numBins).fill(0)
for (let i = 0; i < values.length; i++) for (let i = 0; i < values.length; i++)
bins[Math.floor(((values[i] - min) / (max - min)) * numBins)] += weights ? weights[i] : 1 bins[Math.floor(((values[i] - min) / (max - min)) * numBins)] += scopeWeights ? scopeWeights[i] : 1
// Manual Canvas Original
// return { // return {
// label: idx => { // label: idx => {
// let start = min + (idx / numBins) * (max - min) // let start = min + (idx / numBins) * (max - min)
@ -355,14 +368,13 @@ export function binsFromFootprint(weights, values, numBins) {
return { return {
bins: bins.map((count, idx) => ({ bins: bins.map((count, idx) => ({
value: idx => { // Get rounded down next integer to bins' Start-Stop Mean Value value: idx => { // Use bins' max value instead of mean
let start = min + (idx / numBins) * (max - min) // let start = min + (idx / numBins) * (max - min)
let stop = min + ((idx + 1) / numBins) * (max - min) let stop = min + ((idx + 1) / numBins) * (max - min)
return `${formatNumber(Math.floor((start+stop)/2))}` // return `${formatNumber(Math.floor((start+stop)/2))}`
return Math.floor(stop)
}, },
count: count count: count
})), }))
min: min,
max: max
} }
} }