mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-01-23 18:09:06 +01:00
add dedicated nodeListData handling to metricData interface
This commit is contained in:
parent
1ee367d7be
commit
e871703724
@ -15,6 +15,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@ -43,6 +44,7 @@ type CCMetricStore struct {
|
|||||||
type ApiQueryRequest struct {
|
type ApiQueryRequest struct {
|
||||||
Cluster string `json:"cluster"`
|
Cluster string `json:"cluster"`
|
||||||
Queries []ApiQuery `json:"queries"`
|
Queries []ApiQuery `json:"queries"`
|
||||||
|
NodeQuery NodeQuery `json:"node-query"`
|
||||||
ForAllNodes []string `json:"for-all-nodes"`
|
ForAllNodes []string `json:"for-all-nodes"`
|
||||||
From int64 `json:"from"`
|
From int64 `json:"from"`
|
||||||
To int64 `json:"to"`
|
To int64 `json:"to"`
|
||||||
@ -61,6 +63,19 @@ type ApiQuery struct {
|
|||||||
Aggregate bool `json:"aggreg"`
|
Aggregate bool `json:"aggreg"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeQuery struct {
|
||||||
|
Type *string `json:"type,omitempty"`
|
||||||
|
SubType *string `json:"subtype,omitempty"`
|
||||||
|
Metrics []string `json:"metrics"`
|
||||||
|
NodeFilter string `json:"node-filter"`
|
||||||
|
Resolution int `json:"resolution"`
|
||||||
|
TypeIds []string `json:"type-ids,omitempty"`
|
||||||
|
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||||
|
Aggregate bool `json:"aggreg"`
|
||||||
|
Page int `json:"page"`
|
||||||
|
ItemsPerPage int `json:"items-per-page"`
|
||||||
|
}
|
||||||
|
|
||||||
type ApiQueryResponse struct {
|
type ApiQueryResponse struct {
|
||||||
Queries []ApiQuery `json:"queries,omitempty"`
|
Queries []ApiQuery `json:"queries,omitempty"`
|
||||||
Results [][]ApiMetricData `json:"results"`
|
Results [][]ApiMetricData `json:"results"`
|
||||||
@ -211,7 +226,6 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
jobMetric, ok := jobData[metric][scope]
|
jobMetric, ok := jobData[metric][scope]
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
jobMetric = &schema.JobMetric{
|
jobMetric = &schema.JobMetric{
|
||||||
Unit: mc.Unit,
|
Unit: mc.Unit,
|
||||||
@ -235,8 +249,7 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||||
// TODO: use schema.Float instead of float64?
|
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||||
// This is done because regular float64 can not be JSONed when NaN.
|
|
||||||
res.Avg = schema.Float(0)
|
res.Avg = schema.Float(0)
|
||||||
res.Min = schema.Float(0)
|
res.Min = schema.Float(0)
|
||||||
res.Max = schema.Float(0)
|
res.Max = schema.Float(0)
|
||||||
@ -693,6 +706,427 @@ func (ccms *CCMetricStore) LoadNodeData(
|
|||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ccms *CCMetricStore) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]map[string]map[schema.MetricScope]*schema.JobMetric, error) {
|
||||||
|
|
||||||
|
// 1) Get list of all nodes
|
||||||
|
var nodes []string
|
||||||
|
if subCluster != "" {
|
||||||
|
scNodes := archive.NodeLists[cluster][subCluster]
|
||||||
|
nodes = scNodes.PrintList()
|
||||||
|
} else {
|
||||||
|
subClusterNodeLists := archive.NodeLists[cluster]
|
||||||
|
for _, nodeList := range subClusterNodeLists {
|
||||||
|
nodes = append(nodes, nodeList.PrintList()...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf(">> SEE HERE: NODES (All)! %v (Len: %d)", nodes, len(nodes))
|
||||||
|
|
||||||
|
// 2) Filter nodes
|
||||||
|
if nodeFilter != "" {
|
||||||
|
filteredNodes := []string{}
|
||||||
|
for _, node := range nodes {
|
||||||
|
if strings.Contains(node, nodeFilter) {
|
||||||
|
filteredNodes = append(filteredNodes, node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nodes = filteredNodes
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf(">> SEE HERE: NODES (Filtered)! %v (Len: %d)", nodes, len(nodes))
|
||||||
|
|
||||||
|
// 3) Apply paging
|
||||||
|
if len(nodes) > page.ItemsPerPage {
|
||||||
|
start := (page.Page - 1) * page.ItemsPerPage
|
||||||
|
end := start + page.ItemsPerPage
|
||||||
|
if end > len(nodes) {
|
||||||
|
end = len(nodes)
|
||||||
|
}
|
||||||
|
nodes = nodes[start:end]
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf(">> SEE HERE: NODES (Paged)! %v (Len: %d)", nodes, len(nodes))
|
||||||
|
|
||||||
|
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while building queries")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
req := ApiQueryRequest{
|
||||||
|
Cluster: cluster,
|
||||||
|
Queries: queries,
|
||||||
|
From: from.Unix(),
|
||||||
|
To: to.Unix(),
|
||||||
|
WithStats: true,
|
||||||
|
WithData: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
resBody, err := ccms.doRequest(ctx, &req)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var errors []string
|
||||||
|
data := make(map[string]map[string]map[schema.MetricScope]*schema.JobMetric)
|
||||||
|
for i, row := range resBody.Results {
|
||||||
|
var query ApiQuery
|
||||||
|
if resBody.Queries != nil {
|
||||||
|
query = resBody.Queries[i]
|
||||||
|
} else {
|
||||||
|
query = req.Queries[i]
|
||||||
|
}
|
||||||
|
// qdata := res[0]
|
||||||
|
metric := ccms.toLocalName(query.Metric)
|
||||||
|
scope := assignedScope[i]
|
||||||
|
mc := archive.GetMetricConfig(cluster, metric)
|
||||||
|
|
||||||
|
res := row[0].Resolution
|
||||||
|
if res == 0 {
|
||||||
|
res = mc.Timestep
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetric, ok := data[query.Hostname][metric][scope]
|
||||||
|
if !ok {
|
||||||
|
nodeMetric = &schema.JobMetric{
|
||||||
|
Unit: mc.Unit,
|
||||||
|
Timestep: res,
|
||||||
|
Series: make([]schema.Series, 0),
|
||||||
|
}
|
||||||
|
data[query.Hostname][metric][scope] = nodeMetric
|
||||||
|
}
|
||||||
|
|
||||||
|
for ndx, res := range row {
|
||||||
|
if res.Error != nil {
|
||||||
|
/* Build list for "partial errors", if any */
|
||||||
|
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
id := (*string)(nil)
|
||||||
|
if query.Type != nil {
|
||||||
|
id = new(string)
|
||||||
|
*id = query.TypeIds[ndx]
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||||
|
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||||
|
res.Avg = schema.Float(0)
|
||||||
|
res.Min = schema.Float(0)
|
||||||
|
res.Max = schema.Float(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetric.Series = append(nodeMetric.Series, schema.Series{
|
||||||
|
Hostname: query.Hostname,
|
||||||
|
Id: id,
|
||||||
|
Statistics: schema.MetricStatistics{
|
||||||
|
Avg: float64(res.Avg),
|
||||||
|
Min: float64(res.Min),
|
||||||
|
Max: float64(res.Max),
|
||||||
|
},
|
||||||
|
Data: res.Data,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errors) != 0 {
|
||||||
|
/* Returns list of "partial errors" */
|
||||||
|
return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf(">> SEE HERE: DATA (Final)! %v (Len: %d)", data, len(data))
|
||||||
|
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ccms *CCMetricStore) buildNodeQueries(
|
||||||
|
cluster string,
|
||||||
|
subCluster string,
|
||||||
|
nodes []string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||||
|
|
||||||
|
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||||
|
assignedScope := []schema.MetricScope{}
|
||||||
|
|
||||||
|
// Get Topol before loop if subCluster given
|
||||||
|
var subClusterTopol *schema.SubCluster
|
||||||
|
var scterr error
|
||||||
|
if subCluster != "" {
|
||||||
|
subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)
|
||||||
|
if scterr != nil {
|
||||||
|
// TODO: Log
|
||||||
|
return nil, nil, scterr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, metric := range metrics {
|
||||||
|
remoteName := ccms.toRemoteName(metric)
|
||||||
|
mc := archive.GetMetricConfig(cluster, metric)
|
||||||
|
if mc == nil {
|
||||||
|
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||||
|
log.Infof("metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid duplicates...
|
||||||
|
handledScopes := make([]schema.MetricScope, 0, 3)
|
||||||
|
|
||||||
|
scopesLoop:
|
||||||
|
for _, requestedScope := range scopes {
|
||||||
|
nativeScope := mc.Scope
|
||||||
|
|
||||||
|
scope := nativeScope.Max(requestedScope)
|
||||||
|
for _, s := range handledScopes {
|
||||||
|
if scope == s {
|
||||||
|
continue scopesLoop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
handledScopes = append(handledScopes, scope)
|
||||||
|
|
||||||
|
for _, hostname := range nodes {
|
||||||
|
|
||||||
|
// If no subCluster given, get it by node
|
||||||
|
if subCluster == "" {
|
||||||
|
subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if scnerr != nil {
|
||||||
|
return nil, nil, scnerr
|
||||||
|
}
|
||||||
|
subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)
|
||||||
|
if scterr != nil {
|
||||||
|
return nil, nil, scterr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable
|
||||||
|
// Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable
|
||||||
|
topology := subClusterTopol.Topology
|
||||||
|
acceleratorIds := topology.GetAcceleratorIDs()
|
||||||
|
|
||||||
|
// Moved check here if metric matches hardware specs
|
||||||
|
if nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 {
|
||||||
|
continue scopesLoop
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||||
|
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||||
|
if scope != schema.MetricScopeAccelerator {
|
||||||
|
// Skip all other catched cases
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &acceleratorString,
|
||||||
|
TypeIds: acceleratorIds,
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accelerator -> Node
|
||||||
|
if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode {
|
||||||
|
if len(acceleratorIds) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &acceleratorString,
|
||||||
|
TypeIds: acceleratorIds,
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> HWThead
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Node),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> Core
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
||||||
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
|
for _, core := range cores {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Core[core]),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> Socket
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
||||||
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
|
for _, socket := range sockets {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> Node
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Node),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Core -> Core
|
||||||
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||||
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &coreString,
|
||||||
|
TypeIds: intToStringSlice(cores),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Core -> Node
|
||||||
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||||
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &coreString,
|
||||||
|
TypeIds: intToStringSlice(cores),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// MemoryDomain -> MemoryDomain
|
||||||
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||||
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &memoryDomainString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// MemoryDoman -> Node
|
||||||
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||||
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &memoryDomainString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Socket -> Socket
|
||||||
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||||
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &socketString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Socket -> Node
|
||||||
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||||
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &socketString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Node -> Node
|
||||||
|
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return queries, assignedScope, nil
|
||||||
|
}
|
||||||
|
|
||||||
func intToStringSlice(is []int) []string {
|
func intToStringSlice(is []int) []string {
|
||||||
ss := make([]string, len(is))
|
ss := make([]string, len(is))
|
||||||
for i, x := range is {
|
for i, x := range is {
|
||||||
|
@ -13,6 +13,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@ -312,3 +313,19 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
|||||||
|
|
||||||
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]map[string]map[schema.MetricScope]*schema.JobMetric, error) {
|
||||||
|
|
||||||
|
// TODO : Implement to be used in NodeList-View
|
||||||
|
log.Infof("LoadNodeListData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodeFilter %v, scopes %v", cluster, metrics, nodeFilter, scopes)
|
||||||
|
|
||||||
|
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||||
|
}
|
||||||
|
@ -11,6 +11,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
@ -26,8 +27,11 @@ type MetricDataRepository interface {
|
|||||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
||||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||||
|
|
||||||
// Return a map of hosts to a map of metrics at the requested scopes for that node.
|
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
|
||||||
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
||||||
|
|
||||||
|
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
|
||||||
|
LoadNodeListData(cluster, subCluster, nodeFilter string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page model.PageRequest, ctx context.Context) (map[string]map[string]map[schema.MetricScope]*schema.JobMetric, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||||
|
@ -20,6 +20,7 @@ import (
|
|||||||
"text/template"
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@ -446,3 +447,19 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
|||||||
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pdb *PrometheusDataRepository) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]map[string]map[schema.MetricScope]*schema.JobMetric, error) {
|
||||||
|
|
||||||
|
// TODO : Implement to be used in NodeList-View
|
||||||
|
log.Infof("LoadNodeListData unimplemented for PrometheusDataRepository, Args: cluster %s, metrics %v, nodeFilter %v, scopes %v", cluster, metrics, nodeFilter, scopes)
|
||||||
|
|
||||||
|
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for PrometheusDataRepository")
|
||||||
|
}
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -50,6 +51,19 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
|
|||||||
panic("TODO")
|
panic("TODO")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tmdr *TestMetricDataRepository) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]map[string]map[schema.MetricScope]*schema.JobMetric, error) {
|
||||||
|
|
||||||
|
panic("TODO")
|
||||||
|
}
|
||||||
|
|
||||||
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
||||||
var jd schema.JobData
|
var jd schema.JobData
|
||||||
|
|
||||||
|
@ -15,12 +15,12 @@ import (
|
|||||||
var (
|
var (
|
||||||
Clusters []*schema.Cluster
|
Clusters []*schema.Cluster
|
||||||
GlobalMetricList []*schema.GlobalMetricListItem
|
GlobalMetricList []*schema.GlobalMetricListItem
|
||||||
nodeLists map[string]map[string]NodeList
|
NodeLists map[string]map[string]NodeList
|
||||||
)
|
)
|
||||||
|
|
||||||
func initClusterConfig() error {
|
func initClusterConfig() error {
|
||||||
Clusters = []*schema.Cluster{}
|
Clusters = []*schema.Cluster{}
|
||||||
nodeLists = map[string]map[string]NodeList{}
|
NodeLists = map[string]map[string]NodeList{}
|
||||||
metricLookup := make(map[string]schema.GlobalMetricListItem)
|
metricLookup := make(map[string]schema.GlobalMetricListItem)
|
||||||
|
|
||||||
for _, c := range ar.GetClusters() {
|
for _, c := range ar.GetClusters() {
|
||||||
@ -109,7 +109,7 @@ func initClusterConfig() error {
|
|||||||
|
|
||||||
Clusters = append(Clusters, cluster)
|
Clusters = append(Clusters, cluster)
|
||||||
|
|
||||||
nodeLists[cluster.Name] = make(map[string]NodeList)
|
NodeLists[cluster.Name] = make(map[string]NodeList)
|
||||||
for _, sc := range cluster.SubClusters {
|
for _, sc := range cluster.SubClusters {
|
||||||
if sc.Nodes == "*" {
|
if sc.Nodes == "*" {
|
||||||
continue
|
continue
|
||||||
@ -119,7 +119,7 @@ func initClusterConfig() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
|
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
|
||||||
}
|
}
|
||||||
nodeLists[cluster.Name][sc.Name] = nl
|
NodeLists[cluster.Name][sc.Name] = nl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -187,7 +187,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
host0 := job.Resources[0].Hostname
|
host0 := job.Resources[0].Hostname
|
||||||
for sc, nl := range nodeLists[job.Cluster] {
|
for sc, nl := range NodeLists[job.Cluster] {
|
||||||
if nl != nil && nl.Contains(host0) {
|
if nl != nil && nl.Contains(host0) {
|
||||||
job.SubCluster = sc
|
job.SubCluster = sc
|
||||||
return nil
|
return nil
|
||||||
@ -203,7 +203,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||||
for sc, nl := range nodeLists[cluster] {
|
for sc, nl := range NodeLists[cluster] {
|
||||||
if nl != nil && nl.Contains(hostname) {
|
if nl != nil && nl.Contains(hostname) {
|
||||||
return sc, nil
|
return sc, nil
|
||||||
}
|
}
|
||||||
|
@ -194,7 +194,17 @@ func (topo *Topology) GetAcceleratorID(id int) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
|
// Return list of hardware (string) accelerator IDs
|
||||||
|
func (topo *Topology) GetAcceleratorIDs() []string {
|
||||||
|
accels := make([]string, 0)
|
||||||
|
for _, accel := range topo.Accelerators {
|
||||||
|
accels = append(accels, accel.ID)
|
||||||
|
}
|
||||||
|
return accels
|
||||||
|
}
|
||||||
|
|
||||||
|
// Outdated? Or: Return indices of accelerators in parent array?
|
||||||
|
func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) {
|
||||||
accels := make([]int, 0)
|
accels := make([]int, 0)
|
||||||
for _, accel := range topo.Accelerators {
|
for _, accel := range topo.Accelerators {
|
||||||
id, err := strconv.Atoi(accel.ID)
|
id, err := strconv.Atoi(accel.ID)
|
||||||
|
@ -58,12 +58,30 @@
|
|||||||
let selectedMetrics = ccconfig[`node_list_selectedMetrics:${cluster}`] || [ccconfig.system_view_selectedMetric];
|
let selectedMetrics = ccconfig[`node_list_selectedMetrics:${cluster}`] || [ccconfig.system_view_selectedMetric];
|
||||||
let isMetricsSelectionOpen = false;
|
let isMetricsSelectionOpen = false;
|
||||||
|
|
||||||
// Todo: Add Idle State Filter (== No allocated Jobs) [Frontend?]
|
// New Jan 2025
|
||||||
|
/*
|
||||||
|
- Toss "add_resolution_node_systems" branch OR include/merge here if resolutions in node-overview useful
|
||||||
|
- Add single object field for nodeData query to CCMS query: "nodeDataQuery"
|
||||||
|
- Contains following fields:
|
||||||
|
- metrics: [String] // List of metrics to query
|
||||||
|
- page: Int // Page number
|
||||||
|
- itemsPerPage: Int // Number of items per page
|
||||||
|
- resolution: Int // Requested Resolution for all returned data
|
||||||
|
- nodeFilter: String // (partial) hostname string
|
||||||
|
- With this, all use-cases except "scopes" can be handled, if nodeFilter is "" (empty) all nodes are returned by default
|
||||||
|
- Is basically a stepped up version of the "forAllNodes" property, as "these metrics for all nodes" is still the base idea
|
||||||
|
- Required: Handling in CCMS, co-develop in close contact with Aditya
|
||||||
|
- Question: How and where to handle scope queries? (e.g. "node" vs "accelerator") -> NOT handled in ccms!
|
||||||
|
- NOtes: "Sorting" as use-case ignored for now, probably default to alphanumerical on hostnames of cluster
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Todo: Add Idle State Filter (== No allocated Jobs) [Frontend?] : Cannot be handled by CCMS, requires secondary job query and refiltering of visible nodes
|
||||||
// Todo: NodeList: Mindestens Accelerator Scope ... "Show Detail" Switch?
|
// Todo: NodeList: Mindestens Accelerator Scope ... "Show Detail" Switch?
|
||||||
// Todo: Rework GQL Query: Add Paging (Scrollable / Paging Configbar), Add Nodes Filter (see jobs-onthefly-userfilter: ccms inkompatibel!), add scopes
|
// Todo: Rework GQL Query: Add Paging (Scrollable / Paging Configbar), Add Nodes Filter (see jobs-onthefly-userfilter: ccms inkompatibel!), add scopes
|
||||||
// All three issues need either new features in ccms (paging, filter) or new implementation of ccms node queries with scopes (currently very job-specific)
|
// All three issues need either new features in ccms (paging, filter) or new implementation of ccms node queries with scopes (currently very job-specific)
|
||||||
// Todo: Review performance // observed high client-side load frequency
|
// Todo: Review performance // observed high client-side load frequency
|
||||||
// Is Svelte {#each} -> <MetricPlot/> -> onMount() related : Cannot be skipped ...
|
// Is Svelte {#each} -> <MetricPlot/> -> onMount() related : Cannot be skipped ...
|
||||||
|
// Will be solved as soon as dedicated paging, itemLimits and filtering is implemented in ccms
|
||||||
// ==> Skip for Q4/24 Release, build from ccms upgrade (paging/filter) up
|
// ==> Skip for Q4/24 Release, build from ccms upgrade (paging/filter) up
|
||||||
|
|
||||||
const client = getContextClient();
|
const client = getContextClient();
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
- `cluster String`: Cluster name of the parent job / data
|
- `cluster String`: Cluster name of the parent job / data
|
||||||
- `subCluster String`: Name of the subCluster of the parent job
|
- `subCluster String`: Name of the subCluster of the parent job
|
||||||
- `isShared Bool?`: If this job used shared resources; will adapt threshold indicators accordingly [Default: false]
|
- `isShared Bool?`: If this job used shared resources; will adapt threshold indicators accordingly [Default: false]
|
||||||
- `forNode Bool?`: If this plot is used for node data display; will ren[data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)](https://github.com/ClusterCockpit/cc-backend/blob/9fe7cdca9215220a19930779a60c8afc910276a3/internal/graph/schema.resolvers.go#L391-L392)der x-axis as negative time with $now as maximum [Default: false]
|
- `forNode Bool?`: If this plot is used for node data display; will render x-axis as negative time with $now as maximum [Default: false]
|
||||||
- `numhwthreads Number?`: Number of job HWThreads [Default: 0]
|
- `numhwthreads Number?`: Number of job HWThreads [Default: 0]
|
||||||
- `numaccs Number?`: Number of job Accelerators [Default: 0]
|
- `numaccs Number?`: Number of job Accelerators [Default: 0]
|
||||||
- `zoomState Object?`: The last zoom state to preserve on user zoom [Default: null]
|
- `zoomState Object?`: The last zoom state to preserve on user zoom [Default: null]
|
||||||
|
Loading…
Reference in New Issue
Block a user