mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-11 21:41:46 +01:00
Optimized CCMS healthcheck
This commit is contained in:
@@ -135,45 +135,3 @@ func debugMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// handleHealthCheck godoc
|
||||
// @summary HealthCheck endpoint
|
||||
// @tags healthcheck
|
||||
// @description This endpoint allows the users to check if a node is healthy
|
||||
// @produce json
|
||||
// @param selector query string false "Selector"
|
||||
// @success 200 {string} string "Debug dump"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /healthcheck/ [get]
|
||||
func metricsHealth(rw http.ResponseWriter, r *http.Request) {
|
||||
rawCluster := r.URL.Query().Get("cluster")
|
||||
rawSubCluster := r.URL.Query().Get("subcluster")
|
||||
rawNode := r.URL.Query().Get("node")
|
||||
|
||||
if rawCluster == "" || rawNode == "" {
|
||||
handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
|
||||
selector := []string{rawCluster, rawNode}
|
||||
|
||||
ms := metricstore.GetMemoryStore()
|
||||
response, err := ms.HealthCheck(selector, rawSubCluster)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(response)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error marshaling HealthCheckResponse JSON: %s", err)
|
||||
}
|
||||
|
||||
rw.Write(jsonData)
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||
if sc != "" {
|
||||
metricList := archive.GetMetricConfigSubCluster(req.Cluster, sc)
|
||||
metricNames := metricListToNames(metricList)
|
||||
if states, err := ms.HealthCheckAlt(req.Cluster, nl, metricNames); err == nil {
|
||||
if states, err := ms.HealthCheck(req.Cluster, nl, metricNames); err == nil {
|
||||
maps.Copy(healthStates, states)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) {
|
||||
// Cluster List
|
||||
r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet)
|
||||
// Slurm node state
|
||||
r.HandleFunc("/nodestate/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/nodestates/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut)
|
||||
// Job Handler
|
||||
if config.Keys.APISubjects == nil {
|
||||
cclog.Info("Enabling REST start/stop job API")
|
||||
@@ -127,12 +127,12 @@ func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) {
|
||||
r.HandleFunc("/free", freeMetrics).Methods(http.MethodPost)
|
||||
r.HandleFunc("/write", writeMetrics).Methods(http.MethodPost)
|
||||
r.HandleFunc("/debug", debugMetrics).Methods(http.MethodGet)
|
||||
r.HandleFunc("/healthcheck", metricsHealth).Methods(http.MethodGet)
|
||||
r.HandleFunc("/healthcheck", api.updateNodeStates).Methods(http.MethodPost)
|
||||
// Same endpoints but with trailing slash
|
||||
r.HandleFunc("/free/", freeMetrics).Methods(http.MethodPost)
|
||||
r.HandleFunc("/write/", writeMetrics).Methods(http.MethodPost)
|
||||
r.HandleFunc("/debug/", debugMetrics).Methods(http.MethodGet)
|
||||
r.HandleFunc("/healthcheck/", metricsHealth).Methods(http.MethodGet)
|
||||
r.HandleFunc("/healthcheck/", api.updateNodeStates).Methods(http.MethodPost)
|
||||
}
|
||||
|
||||
// MountConfigAPIRoutes registers configuration and user management endpoints.
|
||||
|
||||
Reference in New Issue
Block a user