mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-25 17:17:29 +01:00
Merge pull request #534 from ClusterCockpit/hotfix
feat: Add command line switch to trigger manual metricstore checkpoin…
This commit is contained in:
@@ -11,7 +11,8 @@ import "flag"
|
||||
|
||||
var (
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB,
|
||||
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags, flagOptimizeDB bool
|
||||
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags, flagOptimizeDB,
|
||||
flagCleanupCheckpoints bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
)
|
||||
|
||||
@@ -28,6 +29,7 @@ func cliInit() {
|
||||
flag.BoolVar(&flagApplyTags, "apply-tags", false, "Run taggers on all completed jobs and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagOptimizeDB, "optimize-db", false, "Optimize database: run VACUUM to reclaim space, then ANALYZE to update query planner statistics")
|
||||
flag.BoolVar(&flagCleanupCheckpoints, "cleanup-checkpoints", false, "Clean up old checkpoint files (delete or archive) based on retention settings, then exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: <username>:[admin,support,manager,api,user]:<password>")
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
goruntime "runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -536,6 +537,43 @@ func run() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle checkpoint cleanup
|
||||
if flagCleanupCheckpoints {
|
||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
||||
if mscfg == nil {
|
||||
return fmt.Errorf("metric-store configuration required for checkpoint cleanup")
|
||||
}
|
||||
if err := json.Unmarshal(mscfg, &metricstore.Keys); err != nil {
|
||||
return fmt.Errorf("decoding metric-store config: %w", err)
|
||||
}
|
||||
if metricstore.Keys.NumWorkers <= 0 {
|
||||
metricstore.Keys.NumWorkers = min(goruntime.NumCPU()/2+1, metricstore.DefaultMaxWorkers)
|
||||
}
|
||||
|
||||
d, err := time.ParseDuration(metricstore.Keys.RetentionInMemory)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing retention-in-memory: %w", err)
|
||||
}
|
||||
from := time.Now().Add(-d)
|
||||
deleteMode := metricstore.Keys.Cleanup == nil || metricstore.Keys.Cleanup.Mode != "archive"
|
||||
cleanupDir := ""
|
||||
if !deleteMode {
|
||||
cleanupDir = metricstore.Keys.Cleanup.RootDir
|
||||
}
|
||||
|
||||
cclog.Infof("Cleaning up checkpoints older than %s...", from.Format(time.RFC3339))
|
||||
n, err := metricstore.CleanupCheckpoints(
|
||||
metricstore.Keys.Checkpoints.RootDir, cleanupDir, from.Unix(), deleteMode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("checkpoint cleanup: %w", err)
|
||||
}
|
||||
if deleteMode {
|
||||
cclog.Exitf("Cleanup done: %d checkpoint files deleted.", n)
|
||||
} else {
|
||||
cclog.Exitf("Cleanup done: %d checkpoint files archived to parquet.", n)
|
||||
}
|
||||
}
|
||||
|
||||
// Exit if start server is not requested
|
||||
if !flagServer {
|
||||
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||
|
||||
@@ -54,11 +54,16 @@
|
||||
const paging = { itemsPerPage: 50, page: 1 };
|
||||
const sorting = { field: "startTime", type: "col", order: "DESC" };
|
||||
const nodeMetricsQuery = gql`
|
||||
query ($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
|
||||
query (
|
||||
$cluster: String!,
|
||||
$nodes: [String!],
|
||||
$from: Time!,
|
||||
$to: Time!,
|
||||
$nodeFilter: [NodeFilter!]!,
|
||||
$sorting: OrderByInput!
|
||||
) {
|
||||
nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) {
|
||||
host
|
||||
nodeState
|
||||
metricHealth
|
||||
subCluster
|
||||
metrics {
|
||||
name
|
||||
@@ -79,7 +84,14 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
nodeStatus: nodes(filter: $nodeFilter, order: $sorting) {
|
||||
count
|
||||
items {
|
||||
schedulerState
|
||||
healthState
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
const nodeJobsQuery = gql`
|
||||
@@ -146,6 +158,8 @@
|
||||
nodes: [hostname],
|
||||
from: from?.toISOString(),
|
||||
to: to?.toISOString(),
|
||||
nodeFilter: { hostname: { eq: hostname }},
|
||||
sorting // $sorting unused in backend: Use placeholder
|
||||
},
|
||||
})
|
||||
);
|
||||
@@ -157,8 +171,8 @@
|
||||
})
|
||||
);
|
||||
|
||||
const thisNodeState = $derived($nodeMetricsData?.data?.nodeMetrics[0]?.nodeState || 'notindb');
|
||||
const thisMetricHealth = $derived($nodeMetricsData?.data?.nodeMetrics[0]?.metricHealth || 'unknown');
|
||||
const thisNodeState = $derived($nodeMetricsData?.data?.nodeStatus?.items[0]?.schedulerState || 'notindb');
|
||||
const thisMetricHealth = $derived($nodeMetricsData?.data?.nodeStatus?.items[0]?.healthState || 'unknown');
|
||||
</script>
|
||||
|
||||
<Row cols={{ xs: 2, lg: 3}}>
|
||||
|
||||
Reference in New Issue
Block a user