diff --git a/internal/graph/util.go b/internal/graph/util.go index b61bcc7..3a2c3b1 100644 --- a/internal/graph/util.go +++ b/internal/graph/util.go @@ -6,7 +6,6 @@ package graph import ( "context" - "errors" "fmt" "math" @@ -33,7 +32,7 @@ func (r *queryResolver) rooflineHeatmap( return nil, err } if len(jobs) > MAX_JOBS_FOR_ANALYSIS { - return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) + return nil, fmt.Errorf("GRAPH/UTIL > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) } fcols, frows := float64(cols), float64(rows) @@ -50,20 +49,24 @@ func (r *queryResolver) rooflineHeatmap( jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx) if err != nil { - log.Error("Error while loading metrics for roofline") + log.Errorf("Error while loading roofline metrics for job %d", job.ID) return nil, err } flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"] if flops_ == nil && membw_ == nil { - return nil, fmt.Errorf("GRAPH/STATS > 'flops_any' or 'mem_bw' missing for job %d", job.ID) + log.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID) + continue + // return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID) } flops, ok1 := flops_["node"] membw, ok2 := membw_["node"] if !ok1 || !ok2 { + log.Info("rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") + continue // TODO/FIXME: - return nil, errors.New("GRAPH/STATS > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") + // return nil, errors.New("GRAPH/UTIL > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") } for n := 0; n < len(flops.Series); n++ { @@ -99,7 +102,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF return nil, err } if len(jobs) > MAX_JOBS_FOR_ANALYSIS { - return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) + return nil, fmt.Errorf("GRAPH/UTIL > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) } avgs := make([][]schema.Float, len(metrics)) diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index cfaa6fd..4874975 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -533,7 +533,9 @@ func (ccms *CCMetricStore) LoadStats( metric := ccms.toLocalName(query.Metric) data := res[0] if data.Error != nil { - return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) + log.Infof("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) + continue + // return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) } metricdata, ok := stats[metric] @@ -543,7 +545,9 @@ func (ccms *CCMetricStore) LoadStats( } if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() { - return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") + log.Infof("fetching %s for node %s failed: one of avg/min/max is NaN", metric, query.Hostname) + continue + // return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") } metricdata[query.Hostname] = schema.MetricStatistics{ diff --git a/web/frontend/src/utils.js b/web/frontend/src/utils.js index 5e9cdae..0650916 100644 --- a/web/frontend/src/utils.js +++ b/web/frontend/src/utils.js @@ -350,31 +350,16 @@ export function binsFromFootprint(weights, scope, values, numBins) { scopeWeights = weights.nodeHours } - const bins = new Array(numBins).fill(0) + const rawBins = new Array(numBins).fill(0) for (let i = 0; i < values.length; i++) - bins[Math.floor(((values[i] - min) / (max - min)) * numBins)] += scopeWeights ? scopeWeights[i] : 1 + rawBins[Math.floor(((values[i] - min) / (max - min)) * numBins)] += scopeWeights ? scopeWeights[i] : 1 - // Manual Canvas Original - // return { - // label: idx => { - // let start = min + (idx / numBins) * (max - min) - // let stop = min + ((idx + 1) / numBins) * (max - min) - // return `${formatNumber(start)} - ${formatNumber(stop)}` - // }, - // bins: bins.map((count, idx) => ({ value: idx, count: count })), - // min: min, - // max: max - // } + const bins = rawBins.map((count, idx) => ({ + value: Math.floor(min + ((idx + 1) / numBins) * (max - min)), + count: count + })) return { - bins: bins.map((count, idx) => ({ - value: idx => { // Use bins' max value instead of mean - // let start = min + (idx / numBins) * (max - min) - let stop = min + ((idx + 1) / numBins) * (max - min) - // return `${formatNumber(Math.floor((start+stop)/2))}` - return Math.floor(stop) - }, - count: count - })) + bins: bins } }