Review and improve node repo queries

This commit is contained in:
2026-02-12 15:45:15 +01:00
parent f016bd4232
commit c15f1117f5
2 changed files with 83 additions and 144 deletions

View File

@@ -38,6 +38,7 @@ CREATE INDEX IF NOT EXISTS nodestates_state_timestamp ON node_state (node_state,
CREATE INDEX IF NOT EXISTS nodestates_health_timestamp ON node_state (health_state, time_stamp); CREATE INDEX IF NOT EXISTS nodestates_health_timestamp ON node_state (health_state, time_stamp);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_state ON node_state (node_id, node_state); CREATE INDEX IF NOT EXISTS nodestates_nodeid_state ON node_state (node_id, node_state);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_health ON node_state (node_id, health_state); CREATE INDEX IF NOT EXISTS nodestates_nodeid_health ON node_state (node_id, health_state);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_timestamp ON node_state (node_id, time_stamp DESC);
-- Add NEW Indices For Increased Amounts of Tags -- Add NEW Indices For Increased Amounts of Tags
CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id); CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id);

View File

@@ -52,6 +52,38 @@ func GetNodeRepository() *NodeRepository {
return nodeRepoInstance return nodeRepoInstance
} }
// latestStateCondition returns a squirrel expression that restricts node_state
// rows to the latest per node_id using a correlated subquery.
// Requires the query to join node and node_state tables.
func latestStateCondition() sq.Sqlizer {
return sq.Expr(
"node_state.id = (SELECT ns2.id FROM node_state ns2 WHERE ns2.node_id = node.id ORDER BY ns2.time_stamp DESC LIMIT 1)",
)
}
// applyNodeFilters applies common NodeFilter conditions to a query that joins
// the node and node_state tables with latestStateCondition.
func applyNodeFilters(query sq.SelectBuilder, filters []*model.NodeFilter) sq.SelectBuilder {
for _, f := range filters {
if f.Cluster != nil {
query = buildStringCondition("node.cluster", f.Cluster, query)
}
if f.SubCluster != nil {
query = buildStringCondition("node.subcluster", f.SubCluster, query)
}
if f.Hostname != nil {
query = buildStringCondition("node.hostname", f.Hostname, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state.node_state = ?", f.SchedulerState)
}
if f.HealthState != nil {
query = query.Where("node_state.health_state = ?", f.HealthState)
}
}
return query
}
func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[string]string, error) { func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[string]string, error) {
start := time.Now() start := time.Now()
@@ -82,17 +114,16 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str
func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) { func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) {
node := &schema.Node{} node := &schema.Node{}
var timestamp int if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", "node_state.node_state", "node_state.health_state").
"node_state.health_state", "MAX(node_state.time_stamp) as time"). From("node").
From("node_state"). Join("node_state ON node_state.node_id = node.id").
Join("node ON node_state.node_id = node.id"). Where(latestStateCondition()).
Where("node.hostname = ?", hostname). Where("node.hostname = ?", hostname).
Where("node.cluster = ?", cluster). Where("node.cluster = ?", cluster).
GroupBy("node_state.node_id").
RunWith(r.DB). RunWith(r.DB).
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil { QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
cclog.Warnf("Error while querying node '%s' at time '%d' from database: %v", hostname, timestamp, err) cclog.Warnf("Error while querying node '%s' from database: %v", hostname, err)
return nil, err return nil, err
} }
@@ -111,16 +142,15 @@ func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool)
func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, error) { func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, error) {
node := &schema.Node{} node := &schema.Node{}
var timestamp int if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", "node_state.node_state", "node_state.health_state").
"node_state.health_state", "MAX(node_state.time_stamp) as time"). From("node").
From("node_state"). Join("node_state ON node_state.node_id = node.id").
Join("node ON node_state.node_id = node.id"). Where(latestStateCondition()).
Where("node.id = ?", id). Where("node.id = ?", id).
GroupBy("node_state.node_id").
RunWith(r.DB). RunWith(r.DB).
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil { QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
cclog.Warnf("Error while querying node ID '%d' at time '%d' from database: %v", id, timestamp, err) cclog.Warnf("Error while querying node ID '%d' from database: %v", id, err)
return nil, err return nil, err
} }
@@ -313,40 +343,17 @@ func (r *NodeRepository) QueryNodes(
order *model.OrderByInput, // Currently unused! order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) { ) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx, query, qerr := AccessCheck(ctx,
sq.Select("hostname", "cluster", "subcluster", "node_state", "health_state", "MAX(time_stamp) as time"). sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state").
From("node"). From("node").
Join("node_state ON node_state.node_id = node.id")) Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil { if qerr != nil {
return nil, qerr return nil, qerr
} }
for _, f := range filters { query = applyNodeFilters(query, filters)
if f.Cluster != nil { query = query.OrderBy("node.hostname ASC")
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.SubCluster != nil {
query = buildStringCondition("subcluster", f.SubCluster, query)
}
if f.Hostname != nil {
query = buildStringCondition("hostname", f.Hostname, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state = ?", f.SchedulerState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
// TODO: Hardcoded TimeDiff Suboptimal - Use Config Option?
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 300)})
}
if f.HealthState != nil {
query = query.Where("health_state = ?", f.HealthState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
// TODO: Hardcoded TimeDiff Suboptimal - Use Config Option?
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 300)})
}
}
query = query.GroupBy("node_id").OrderBy("hostname ASC")
if page != nil && page.ItemsPerPage != -1 { if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage) limit := uint64(page.ItemsPerPage)
@@ -363,11 +370,10 @@ func (r *NodeRepository) QueryNodes(
nodes := make([]*schema.Node, 0) nodes := make([]*schema.Node, 0)
for rows.Next() { for rows.Next() {
node := schema.Node{} node := schema.Node{}
var timestamp int
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster, if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
&node.NodeState, &node.HealthState, &timestamp); err != nil { &node.NodeState, &node.HealthState); err != nil {
rows.Close() rows.Close()
cclog.Warnf("Error while scanning rows (QueryNodes) at time '%d'", timestamp) cclog.Warn("Error while scanning rows (QueryNodes)")
return nil, err return nil, err
} }
nodes = append(nodes, &node) nodes = append(nodes, &node)
@@ -377,74 +383,39 @@ func (r *NodeRepository) QueryNodes(
} }
// CountNodes returns the total matched nodes based on a node filter. It always operates // CountNodes returns the total matched nodes based on a node filter. It always operates
// on the last state (largest timestamp). // on the last state (largest timestamp) per node.
func (r *NodeRepository) CountNodes( func (r *NodeRepository) CountNodes(
ctx context.Context, ctx context.Context,
filters []*model.NodeFilter, filters []*model.NodeFilter,
) (int, error) { ) (int, error) {
query, qerr := AccessCheck(ctx, query, qerr := AccessCheck(ctx,
sq.Select("time_stamp", "count(*) as countRes"). sq.Select("COUNT(*)").
From("node"). From("node").
Join("node_state ON node_state.node_id = node.id")) Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil { if qerr != nil {
return 0, qerr return 0, qerr
} }
for _, f := range filters { query = applyNodeFilters(query, filters)
if f.Cluster != nil {
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.SubCluster != nil {
query = buildStringCondition("subcluster", f.SubCluster, query)
}
if f.Hostname != nil {
query = buildStringCondition("hostname", f.Hostname, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state = ?", f.SchedulerState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
// TODO: Hardcoded TimeDiff Suboptimal - Use Config Option?
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 300)})
}
if f.HealthState != nil {
query = query.Where("health_state = ?", f.HealthState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
// TODO: Hardcoded TimeDiff Suboptimal - Use Config Option?
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 300)})
}
}
query = query.GroupBy("time_stamp").OrderBy("time_stamp DESC").Limit(1) var count int
if err := query.RunWith(r.stmtCache).QueryRow().Scan(&count); err != nil {
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
queryString, queryVars, _ := query.ToSql() queryString, queryVars, _ := query.ToSql()
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return 0, err return 0, err
} }
var totalNodes int return count, nil
for rows.Next() {
var timestamp int
if err := rows.Scan(&timestamp, &totalNodes); err != nil {
rows.Close()
cclog.Warnf("Error while scanning rows (CountNodes) at time '%d'", timestamp)
return 0, err
}
}
return totalNodes, nil
} }
func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
q := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", q := sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.health_state", "MAX(node_state.time_stamp) as time"). "node_state.node_state", "node_state.health_state").
From("node"). From("node").
Join("node_state ON node_state.node_id = node.id"). Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
Where("node.cluster = ?", cluster). Where("node.cluster = ?", cluster).
GroupBy("node_state.node_id").
OrderBy("node.hostname ASC") OrderBy("node.hostname ASC")
rows, err := q.RunWith(r.DB).Query() rows, err := q.RunWith(r.DB).Query()
@@ -456,10 +427,9 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
defer rows.Close() defer rows.Close()
for rows.Next() { for rows.Next() {
node := &schema.Node{} node := &schema.Node{}
var timestamp int
if err := rows.Scan(&node.Hostname, &node.Cluster, if err := rows.Scan(&node.Hostname, &node.Cluster,
&node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil { &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
cclog.Warnf("Error while scanning node list (ListNodes) at time '%d'", timestamp) cclog.Warn("Error while scanning node list (ListNodes)")
return nil, err return nil, err
} }
@@ -470,11 +440,11 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
} }
func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) { func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
q := sq.Select("node.hostname", "node_state.node_state", "MAX(node_state.time_stamp) as time"). q := sq.Select("node.hostname", "node_state.node_state").
From("node"). From("node").
Join("node_state ON node_state.node_id = node.id"). Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
Where("node.cluster = ?", cluster). Where("node.cluster = ?", cluster).
GroupBy("node_state.node_id").
OrderBy("node.hostname ASC") OrderBy("node.hostname ASC")
rows, err := q.RunWith(r.DB).Query() rows, err := q.RunWith(r.DB).Query()
@@ -487,9 +457,8 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
defer rows.Close() defer rows.Close()
for rows.Next() { for rows.Next() {
var hostname, nodestate string var hostname, nodestate string
var timestamp int if err := rows.Scan(&hostname, &nodestate); err != nil {
if err := rows.Scan(&hostname, &nodestate, &timestamp); err != nil { cclog.Warn("Error while scanning node list (MapNodes)")
cclog.Warnf("Error while scanning node list (MapNodes) at time '%d'", timestamp)
return nil, err return nil, err
} }
@@ -500,33 +469,16 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
} }
func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) { func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) {
query, qerr := AccessCheck(ctx, sq.Select("hostname", column, "MAX(time_stamp) as time").From("node")) query, qerr := AccessCheck(ctx,
sq.Select(column).
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil { if qerr != nil {
return nil, qerr return nil, qerr
} }
query = query.Join("node_state ON node_state.node_id = node.id") query = applyNodeFilters(query, filters)
for _, f := range filters {
if f.Hostname != nil {
query = buildStringCondition("hostname", f.Hostname, query)
}
if f.Cluster != nil {
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.SubCluster != nil {
query = buildStringCondition("subcluster", f.SubCluster, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state = ?", f.SchedulerState)
}
if f.HealthState != nil {
query = query.Where("health_state = ?", f.HealthState)
}
}
// Add Group and Order
query = query.GroupBy("hostname").OrderBy("hostname DESC")
rows, err := query.RunWith(r.stmtCache).Query() rows, err := query.RunWith(r.stmtCache).Query()
if err != nil { if err != nil {
@@ -537,12 +489,10 @@ func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeF
stateMap := map[string]int{} stateMap := map[string]int{}
for rows.Next() { for rows.Next() {
var hostname, state string var state string
var timestamp int if err := rows.Scan(&state); err != nil {
if err := rows.Scan(&hostname, &state, &timestamp); err != nil {
rows.Close() rows.Close()
cclog.Warnf("Error while scanning rows (CountStates) at time '%d'", timestamp) cclog.Warn("Error while scanning rows (CountStates)")
return nil, err return nil, err
} }
@@ -735,26 +685,14 @@ func (r *NodeRepository) GetNodesForList(
} }
} else { } else {
// DB Nodes: Count and Find Next Page // DB Nodes: Count and derive hasNextPage from count
var cerr error var cerr error
countNodes, cerr = r.CountNodes(ctx, queryFilters) countNodes, cerr = r.CountNodes(ctx, queryFilters)
if cerr != nil { if cerr != nil {
cclog.Warn("error while counting node database data (Resolver.NodeMetricsList)") cclog.Warn("error while counting node database data (Resolver.NodeMetricsList)")
return nil, nil, 0, false, cerr return nil, nil, 0, false, cerr
} }
hasNextPage = page.Page*page.ItemsPerPage < countNodes
// Example Page 4 @ 10 IpP : Does item 41 exist?
// Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 exists.
nextPage := &model.PageRequest{
ItemsPerPage: 1,
Page: ((page.Page * page.ItemsPerPage) + 1),
}
nextNodes, err := r.QueryNodes(ctx, queryFilters, nextPage, nil) // Order not Used
if err != nil {
cclog.Warn("Error while querying next nodes")
return nil, nil, 0, false, err
}
hasNextPage = len(nextNodes) == 1
} }
// Fallback for non-init'd node table in DB; Ignores stateFilter // Fallback for non-init'd node table in DB; Ignores stateFilter