mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-21 07:17:30 +01:00
Compare commits
16 Commits
feature/52
...
v1.5.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97330ce598 | ||
| fb176c5afb | |||
|
|
d4ee937115 | ||
| 999d93efc3 | |||
|
|
4ce0cfb686 | ||
| 359962d166 | |||
| 60554896d5 | |||
|
|
a9f335d910 | ||
| bf48389aeb | |||
|
|
676025adfe | ||
|
|
d4a0ae173f | ||
|
|
a7e5ecaf6c | ||
|
|
965e2007fb | ||
|
|
6a29faf460 | ||
|
|
8751ae023d | ||
|
|
128c098865 |
@@ -5,6 +5,7 @@ before:
|
||||
builds:
|
||||
- env:
|
||||
- CGO_ENABLED=1
|
||||
- CC=x86_64-linux-musl-gcc
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
|
||||
@@ -10,7 +10,10 @@ If you are upgrading from v1.5.0 you need to do another DB migration. This
|
||||
should not take long. For optimal database performance after the migration it is
|
||||
recommended to apply the new `optimize-db` flag, which runs the sqlite `ANALYZE`
|
||||
and `VACUUM` commands. Depending on your database size (more then 40GB) the
|
||||
`VACUUM` may take up to 2h.
|
||||
`VACUUM` may take up to 2h. You can also run the `ANALYZE` command manually.
|
||||
While we are confident that the memory issue with the metricstore cleanup move
|
||||
policy is fixed, it is still recommended to use delete policy for cleanup.
|
||||
This is also the default.
|
||||
|
||||
## Changes in 1.5.2
|
||||
|
||||
@@ -19,6 +22,14 @@ and `VACUUM` commands. Depending on your database size (more then 40GB) the
|
||||
- **Memory spike in parquet writer**: Fixed memory spikes when using the
|
||||
metricstore move (archive) policy with the parquet writer. The writer now
|
||||
processes data in a streaming fashion to avoid accumulating large allocations.
|
||||
- **Top list query fixes**: Fixed top list queries in analysis and dashboard
|
||||
views.
|
||||
- **Exclude down nodes from HealthCheck**: Down nodes are now excluded from
|
||||
health checks in both the REST and NATS handlers.
|
||||
- **Node state priority order**: Node state determination now enforces a
|
||||
priority order. Exception: idle+down results in idle.
|
||||
- **Blocking ReceiveNats call**: Fixed a blocking NATS receive call in the
|
||||
metricstore.
|
||||
|
||||
### Database performance
|
||||
|
||||
@@ -33,6 +44,16 @@ and `VACUUM` commands. Depending on your database size (more then 40GB) the
|
||||
write load.
|
||||
- **Increased default SQLite timeout**: The default SQLite connection timeout
|
||||
has been raised to reduce spurious timeout errors under load.
|
||||
- **Optimized stats queries**: Improved sortby handling in stats queries, fixed
|
||||
cache key passing, and simplified a stats query condition that caused an
|
||||
expensive unnecessary subquery.
|
||||
|
||||
### MetricStore performance
|
||||
|
||||
- **Sharded WAL consumer**: The WAL consumer is now sharded for significantly
|
||||
higher write throughput.
|
||||
- **NATS contention fix**: Fixed contention in the metricstore NATS ingestion
|
||||
path.
|
||||
|
||||
### NATS API
|
||||
|
||||
@@ -52,6 +73,24 @@ and `VACUUM` commands. Depending on your database size (more then 40GB) the
|
||||
operation.
|
||||
- **Checkpoint archiving log**: Added an informational log message when the
|
||||
metricstore checkpoint archiving process runs.
|
||||
- **Auth failure context**: Auth failure log messages now include more context
|
||||
information.
|
||||
|
||||
### Behavior changes
|
||||
|
||||
- **DB-based metricHealth**: Replaced heuristic-based metric health with
|
||||
DB-based metric health for the node view, providing more accurate health
|
||||
status information.
|
||||
- **Removed minRunningFor filter remnants**: Cleaned up remaining `minRunningFor`
|
||||
references from the GraphQL schema and query builder.
|
||||
|
||||
### Frontend
|
||||
|
||||
- **Streamlined statsSeries**: Unified stats series calculation and rendering
|
||||
across plot components.
|
||||
- **Clarified plot titles**: Improved titles in dashboard and health views.
|
||||
- **Bumped frontend dependencies**: Updated frontend dependencies to latest
|
||||
versions.
|
||||
|
||||
### Dependencies
|
||||
|
||||
@@ -67,7 +106,7 @@ and `VACUUM` commands. Depending on your database size (more then 40GB) the
|
||||
running has to be allowed to execute the journalctl command.
|
||||
- The user configuration keys for the ui have changed. Therefore old user
|
||||
configuration persisted in the database is not used anymore. It is recommended
|
||||
to configure the metrics shown in the ui-config sestion and remove all records
|
||||
to configure the metrics shown in the ui-config section and remove all records
|
||||
in the table after the update.
|
||||
- Currently energy footprint metrics of type energy are ignored for calculating
|
||||
total energy.
|
||||
|
||||
4
go.sum
4
go.sum
@@ -4,10 +4,6 @@ github.com/99designs/gqlgen v0.17.88 h1:neMQDgehMwT1vYIOx/w5ZYPUU/iMNAJzRO44I5In
|
||||
github.com/99designs/gqlgen v0.17.88/go.mod h1:qeqYFEgOeSKqWedOjogPizimp2iu4E23bdPvl4jTYic=
|
||||
github.com/Azure/go-ntlmssp v0.1.0 h1:DjFo6YtWzNqNvQdrwEyr/e4nhU3vRiwenz5QX7sFz+A=
|
||||
github.com/Azure/go-ntlmssp v0.1.0/go.mod h1:NYqdhxd/8aAct/s4qSYZEerdPuH1liG2/X9DiVTbhpk=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.8.2 h1:rCLZk8wz8yq8xBnBEdVKigvA2ngR8dPmHbEFwxxb3jw=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.8.2/go.mod h1:FwD8vnTIbBM3ngeLNKmCvp9FoSjQZm7xnuaVxEKR23o=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.9.0 h1:mzUYakcjwb+UP5II4jOvr36rSYct90gXBbtUg+nvm9c=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.9.0/go.mod h1:FwD8vnTIbBM3ngeLNKmCvp9FoSjQZm7xnuaVxEKR23o=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.9.1 h1:eplKhXQyGAElBGCEGdmxwj7fLv26Op16uK0KxUePDak=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.9.1/go.mod h1:FwD8vnTIbBM3ngeLNKmCvp9FoSjQZm7xnuaVxEKR23o=
|
||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q=
|
||||
|
||||
@@ -676,6 +676,11 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
// Use request-scoped cache: multiple aliases with same (filter, groupBy)
|
||||
// but different sortBy/page hit the DB only once.
|
||||
if cache := getStatsGroupCache(ctx); cache != nil {
|
||||
// Ensure the sort field is computed even if not in the GraphQL selection,
|
||||
// because sortAndPageStats will sort by it in memory.
|
||||
if sortBy != nil {
|
||||
reqFields[sortByFieldName(*sortBy)] = true
|
||||
}
|
||||
key := statsCacheKey(filter, groupBy, reqFields)
|
||||
var allStats []*model.JobsStatistics
|
||||
allStats, err = cache.getOrCompute(key, func() ([]*model.JobsStatistics, error) {
|
||||
|
||||
@@ -107,6 +107,33 @@ func sortAndPageStats(allStats []*model.JobsStatistics, sortBy *model.SortByAggr
|
||||
return sorted
|
||||
}
|
||||
|
||||
// sortByFieldName maps a SortByAggregate enum to the corresponding reqFields key.
|
||||
// This ensures the DB computes the column that sortAndPageStats will sort by.
|
||||
func sortByFieldName(sortBy model.SortByAggregate) string {
|
||||
switch sortBy {
|
||||
case model.SortByAggregateTotaljobs:
|
||||
return "totalJobs"
|
||||
case model.SortByAggregateTotalusers:
|
||||
return "totalUsers"
|
||||
case model.SortByAggregateTotalwalltime:
|
||||
return "totalWalltime"
|
||||
case model.SortByAggregateTotalnodes:
|
||||
return "totalNodes"
|
||||
case model.SortByAggregateTotalnodehours:
|
||||
return "totalNodeHours"
|
||||
case model.SortByAggregateTotalcores:
|
||||
return "totalCores"
|
||||
case model.SortByAggregateTotalcorehours:
|
||||
return "totalCoreHours"
|
||||
case model.SortByAggregateTotalaccs:
|
||||
return "totalAccs"
|
||||
case model.SortByAggregateTotalacchours:
|
||||
return "totalAccHours"
|
||||
default:
|
||||
return "totalJobs"
|
||||
}
|
||||
}
|
||||
|
||||
// statsFieldGetter returns a function that extracts the sortable int field
|
||||
// from a JobsStatistics struct for the given sort key.
|
||||
func statsFieldGetter(sortBy model.SortByAggregate) func(*model.JobsStatistics) int {
|
||||
|
||||
Reference in New Issue
Block a user