Update Node table code. Add simple unit test

This commit is contained in:
2025-10-16 12:54:16 +02:00
parent 89055506d6
commit 40398497c2
13 changed files with 678 additions and 296 deletions

View File

@@ -10,7 +10,6 @@ import (
"database/sql"
"encoding/json"
"fmt"
"maps"
"sync"
"time"
@@ -50,90 +49,91 @@ func GetNodeRepository() *NodeRepository {
return nodeRepoInstance
}
func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) {
func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[string]string, error) {
start := time.Now()
cachekey := fmt.Sprintf("metadata:%d", node.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
node.MetaData = cached.(map[string]string)
return node.MetaData, nil
}
if err := sq.Select("node.meta_data").From("node").Where("node.id = ?", node.ID).
RunWith(r.stmtCache).QueryRow().Scan(&node.RawMetaData); err != nil {
RawMetaData := make([]byte, 0)
if err := sq.Select("node.meta_data").From("node").
Where("node.hostname = ?", hostname).
Where("node.cluster = ?", cluster).
RunWith(r.stmtCache).QueryRow().Scan(&RawMetaData); err != nil {
cclog.Warn("Error while scanning for node metadata")
return nil, err
}
if len(node.RawMetaData) == 0 {
if len(RawMetaData) == 0 {
return nil, nil
}
if err := json.Unmarshal(node.RawMetaData, &node.MetaData); err != nil {
MetaData := make(map[string]string)
if err := json.Unmarshal(RawMetaData, &MetaData); err != nil {
cclog.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
cclog.Debugf("Timer FetchMetadata %s", time.Since(start))
return node.MetaData, nil
return MetaData, nil
}
func (r *NodeRepository) UpdateMetadata(node *schema.Node, key, val string) (err error) {
cachekey := fmt.Sprintf("metadata:%d", node.ID)
r.cache.Del(cachekey)
if node.MetaData == nil {
if _, err = r.FetchMetadata(node); err != nil {
cclog.Warnf("Error while fetching metadata for node, DB ID '%v'", node.ID)
return err
}
}
//
// func (r *NodeRepository) UpdateMetadata(node *schema.Node, key, val string) (err error) {
// cachekey := fmt.Sprintf("metadata:%d", node.ID)
// r.cache.Del(cachekey)
// if node.MetaData == nil {
// if _, err = r.FetchMetadata(node); err != nil {
// cclog.Warnf("Error while fetching metadata for node, DB ID '%v'", node.ID)
// return err
// }
// }
//
// if node.MetaData != nil {
// cpy := make(map[string]string, len(node.MetaData)+1)
// maps.Copy(cpy, node.MetaData)
// cpy[key] = val
// node.MetaData = cpy
// } else {
// node.MetaData = map[string]string{key: val}
// }
//
// if node.RawMetaData, err = json.Marshal(node.MetaData); err != nil {
// cclog.Warnf("Error while marshaling metadata for node, DB ID '%v'", node.ID)
// return err
// }
//
// if _, err = sq.Update("node").
// Set("meta_data", node.RawMetaData).
// Where("node.id = ?", node.ID).
// RunWith(r.stmtCache).Exec(); err != nil {
// cclog.Warnf("Error while updating metadata for node, DB ID '%v'", node.ID)
// return err
// }
//
// r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
// return nil
// }
if node.MetaData != nil {
cpy := make(map[string]string, len(node.MetaData)+1)
maps.Copy(cpy, node.MetaData)
cpy[key] = val
node.MetaData = cpy
} else {
node.MetaData = map[string]string{key: val}
}
if node.RawMetaData, err = json.Marshal(node.MetaData); err != nil {
cclog.Warnf("Error while marshaling metadata for node, DB ID '%v'", node.ID)
return err
}
if _, err = sq.Update("node").
Set("meta_data", node.RawMetaData).
Where("node.id = ?", node.ID).
RunWith(r.stmtCache).Exec(); err != nil {
cclog.Warnf("Error while updating metadata for node, DB ID '%v'", node.ID)
return err
}
r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
return nil
}
func (r *NodeRepository) GetNode(id int64, withMeta bool) (*schema.Node, error) {
func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) {
node := &schema.Node{}
// if err := sq.Select("id", "hostname", "cluster", "subcluster", "node_state",
// "health_state").From("node").
// Where("node.id = ?", id).RunWith(r.DB).
// QueryRow().Scan(&node.ID, &node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState,
// &node.HealthState); err != nil {
// cclog.Warnf("Error while querying node '%v' from database", id)
// return nil, err
// }
//
// if withMeta {
// var err error
// var meta map[string]string
// if meta, err = r.FetchMetadata(node); err != nil {
// cclog.Warnf("Error while fetching metadata for node '%v'", id)
// return nil, err
// }
// node.MetaData = meta
// }
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
"node_state.health_state", "MAX(node_state.time_stamp)").From("node_state").
Join("node ON nodes_state.node_id = node.id").GroupBy("node_state.node_id").
Where("node.hostname = ?", hostname).Where("node.cluster = ?", cluster).RunWith(r.DB).
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
cclog.Warnf("Error while querying node '%s' from database: %v", hostname, err)
return nil, err
}
if withMeta {
var err error
var meta map[string]string
if meta, err = r.FetchMetadata(hostname, cluster); err != nil {
cclog.Warnf("Error while fetching metadata for node '%s'", hostname)
return nil, err
}
node.MetaData = meta
}
return node, nil
}
@@ -151,7 +151,7 @@ INSERT INTO node (hostname, cluster, subcluster)
// AddNode adds a Node to the node table. This can be triggered by a node collector registration or
// from a nodestate update from the job scheduler.
func (r *NodeRepository) AddNode(node *schema.Node) (int64, error) {
func (r *NodeRepository) AddNode(node *schema.NodeDB) (int64, error) {
var err error
res, err := r.DB.NamedExec(NamedNodeInsert, node)
@@ -168,30 +168,15 @@ func (r *NodeRepository) AddNode(node *schema.Node) (int64, error) {
return node.ID, nil
}
func (r *NodeRepository) InsertNodeState(nodeState *schema.Node) error {
subcluster, err := archive.GetSubClusterByNode(nodeState.Cluster, nodeState.Hostname)
if err != nil {
cclog.Errorf("Error while getting subcluster for node '%s' in cluster '%s': %v", nodeState.Hostname, nodeState.Cluster, err)
return err
}
nodeState.SubCluster = subcluster
_, err = r.DB.NamedExec(NamedNodeInsert, nodeState)
if err != nil {
cclog.Errorf("Error while insert node '%v' to database", nodeState.Hostname)
return err
}
return nil
}
const NamedNodeStateInsert string = `
INSERT INTO node_state (time_stamp, node_state, health_state, cpus_allocated,
memory_allocated, gpus_allocated, jobs_running, node_id)
VALUES (:time_stamp, :node_state, :health_state, :cpus_allocated, :memory_allocated, :gpus_allocated, :jobs_running, :node_id);`
func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeState *schema.NodeState) error {
// TODO: Add real Monitoring Health State
// UpdateNodeState is called from the Node REST API to add a row in the node state table
func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeState *schema.NodeStateDB) error {
var id int64
if err := sq.Select("id").From("node").
@@ -203,7 +188,7 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
cclog.Errorf("Error while getting subcluster for node '%s' in cluster '%s': %v", hostname, cluster, err)
return err
}
node := schema.Node{
node := schema.NodeDB{
Hostname: hostname, Cluster: cluster, SubCluster: subcluster,
}
id, err = r.AddNode(&node)
@@ -220,6 +205,8 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
}
}
nodeState.NodeID = id
_, err := r.DB.NamedExec(NamedNodeStateInsert, nodeState)
if err != nil {
cclog.Errorf("Error while adding node state for '%v' to database", hostname)
@@ -254,9 +241,11 @@ func (r *NodeRepository) QueryNodes(
ctx context.Context,
filters []*model.NodeFilter,
order *model.OrderByInput, // Currently unused!
) ([]*model.Node, error) {
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx,
sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", "node_state.health_state", "MAX(node_state.time_stamp)").From("node_state").Join("node ON nodes_state.node_id = node.id").GroupBy("node_state.node_id"))
sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
"node_state.health_state", "MAX(node_state.time_stamp)").From("node_state").
Join("node ON nodes_state.node_id = node.id").GroupBy("node_state.node_id"))
if qerr != nil {
return nil, qerr
}
@@ -271,8 +260,8 @@ func (r *NodeRepository) QueryNodes(
if f.Subcluster != nil {
query = buildStringCondition("node.subcluster", f.Subcluster, query)
}
if f.NodeState != nil {
query = query.Where("node.node_state = ?", f.NodeState)
if f.SchedulerState != nil {
query = query.Where("node.node_state = ?", f.SchedulerState)
}
if f.HealthState != nil {
query = query.Where("node.health_state = ?", f.HealthState)
@@ -286,9 +275,9 @@ func (r *NodeRepository) QueryNodes(
return nil, err
}
nodes := make([]*model.Node, 0, 50)
nodes := make([]*schema.Node, 0, 50)
for rows.Next() {
node := model.Node{}
node := schema.Node{}
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
&node.NodeState, &node.HealthState); err != nil {