diff --git a/go.mod b/go.mod index b35eafe5..f9bf7e42 100644 --- a/go.mod +++ b/go.mod @@ -124,5 +124,3 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect sigs.k8s.io/yaml v1.6.0 // indirect ) - -replace github.com/ClusterCockpit/cc-lib/v2 => ../cc-lib diff --git a/go.sum b/go.sum index d5bbe045..509c659c 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/99designs/gqlgen v0.17.85 h1:EkGx3U2FDcxQm8YDLQSpXIAVmpDyZ3IcBMOJi2nH github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= +github.com/ClusterCockpit/cc-lib/v2 v2.4.0 h1:OnZlvqSatg7yCQ2NtSR7AddpUVSiuSMZ8scF1a7nfOk= +github.com/ClusterCockpit/cc-lib/v2 v2.4.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw= github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= diff --git a/internal/repository/node.go b/internal/repository/node.go index a746182b..08a694c6 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -254,7 +254,7 @@ func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode From("node_state"). Join("node ON node_state.node_id = node.id"). Where(sq.Lt{"node_state.time_stamp": cutoff}). - Where("node_state.id NOT IN (SELECT MAX(id) FROM node_state GROUP BY node_id)"). + Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))"). OrderBy("node_state.time_stamp ASC"). RunWith(r.DB).Query() if err != nil { @@ -278,10 +278,14 @@ func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode } // DeleteNodeStatesBefore removes node_state rows with time_stamp < cutoff, -// but always preserves the latest row per node_id. +// but always preserves the row with the latest timestamp per node_id. func (r *NodeRepository) DeleteNodeStatesBefore(cutoff int64) (int64, error) { res, err := r.DB.Exec( - `DELETE FROM node_state WHERE time_stamp < ? AND id NOT IN (SELECT MAX(id) FROM node_state GROUP BY node_id)`, + `DELETE FROM node_state WHERE time_stamp < ? + AND id NOT IN ( + SELECT id FROM node_state ns2 + WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id) + )`, cutoff, ) if err != nil { diff --git a/internal/repository/node_test.go b/internal/repository/node_test.go index 4286ab34..d1e86b9a 100644 --- a/internal/repository/node_test.go +++ b/internal/repository/node_test.go @@ -156,8 +156,12 @@ func nodeTestSetup(t *testing.T) { func TestUpdateNodeState(t *testing.T) { nodeTestSetup(t) + repo := GetNodeRepository() + now := time.Now().Unix() + nodeState := schema.NodeStateDB{ - TimeStamp: time.Now().Unix(), NodeState: "allocated", + TimeStamp: now, + NodeState: "allocated", CpusAllocated: 72, MemoryAllocated: 480, GpusAllocated: 0, @@ -165,18 +169,152 @@ func TestUpdateNodeState(t *testing.T) { JobsRunning: 1, } - repo := GetNodeRepository() err := repo.UpdateNodeState("host124", "testcluster", &nodeState) if err != nil { - return + t.Fatal(err) } node, err := repo.GetNode("host124", "testcluster", false) if err != nil { - return + t.Fatal(err) } if node.NodeState != "allocated" { t.Errorf("wrong node state\ngot: %s \nwant: allocated ", node.NodeState) } + + t.Run("FindBeforeEmpty", func(t *testing.T) { + // Only the current-timestamp row exists, so nothing should be found before now + rows, err := repo.FindNodeStatesBefore(now) + if err != nil { + t.Fatal(err) + } + if len(rows) != 0 { + t.Errorf("expected 0 rows, got %d", len(rows)) + } + }) + + t.Run("DeleteOldRows", func(t *testing.T) { + // Insert 2 more old rows for host124 + for i, ts := range []int64{now - 7200, now - 3600} { + ns := schema.NodeStateDB{ + TimeStamp: ts, + NodeState: "allocated", + HealthState: schema.MonitoringStateFull, + CpusAllocated: 72, + MemoryAllocated: 480, + JobsRunning: i, + } + if err := repo.UpdateNodeState("host124", "testcluster", &ns); err != nil { + t.Fatal(err) + } + } + + // Delete rows older than 30 minutes + cutoff := now - 1800 + cnt, err := repo.DeleteNodeStatesBefore(cutoff) + if err != nil { + t.Fatal(err) + } + + // Should delete the 2 old rows + if cnt != 2 { + t.Errorf("expected 2 deleted rows, got %d", cnt) + } + + // Latest row should still exist + node, err := repo.GetNode("host124", "testcluster", false) + if err != nil { + t.Fatal(err) + } + if node.NodeState != "allocated" { + t.Errorf("expected node state 'allocated', got %s", node.NodeState) + } + }) + + t.Run("PreservesLatestPerNode", func(t *testing.T) { + // Insert a single old row for host125 — it's the latest per node so it must survive + ns := schema.NodeStateDB{ + TimeStamp: now - 7200, + NodeState: "idle", + HealthState: schema.MonitoringStateFull, + CpusAllocated: 0, + MemoryAllocated: 0, + JobsRunning: 0, + } + if err := repo.UpdateNodeState("host125", "testcluster", &ns); err != nil { + t.Fatal(err) + } + + // Delete everything older than now — the latest per node should be preserved + _, err := repo.DeleteNodeStatesBefore(now) + if err != nil { + t.Fatal(err) + } + + // The latest row for host125 must still exist + node, err := repo.GetNode("host125", "testcluster", false) + if err != nil { + t.Fatal(err) + } + if node.NodeState != "idle" { + t.Errorf("expected node state 'idle', got %s", node.NodeState) + } + + // Verify exactly 1 row remains for host125 + var countAfter int + if err := repo.DB.QueryRow( + "SELECT COUNT(*) FROM node_state WHERE node_id = (SELECT id FROM node WHERE hostname = 'host125')"). + Scan(&countAfter); err != nil { + t.Fatal(err) + } + if countAfter != 1 { + t.Errorf("expected 1 row remaining for host125, got %d", countAfter) + } + }) + + t.Run("FindBeforeWithJoin", func(t *testing.T) { + // Insert old and current rows for host123 + for _, ts := range []int64{now - 7200, now} { + ns := schema.NodeStateDB{ + TimeStamp: ts, + NodeState: "allocated", + HealthState: schema.MonitoringStateFull, + CpusAllocated: 8, + MemoryAllocated: 1024, + GpusAllocated: 1, + JobsRunning: 1, + } + if err := repo.UpdateNodeState("host123", "testcluster", &ns); err != nil { + t.Fatal(err) + } + } + + // Find rows older than 30 minutes, excluding latest per node + cutoff := now - 1800 + rows, err := repo.FindNodeStatesBefore(cutoff) + if err != nil { + t.Fatal(err) + } + + // Should find the old host123 row + found := false + for _, row := range rows { + if row.Hostname == "host123" && row.TimeStamp == now-7200 { + found = true + if row.Cluster != "testcluster" { + t.Errorf("expected cluster 'testcluster', got %s", row.Cluster) + } + if row.SubCluster != "sc1" { + t.Errorf("expected subcluster 'sc1', got %s", row.SubCluster) + } + if row.CpusAllocated != 8 { + t.Errorf("expected cpus_allocated 8, got %d", row.CpusAllocated) + } + } + } + if !found { + t.Errorf("expected to find old host123 row among %d results", len(rows)) + } + }) }