Optimized CCMS healthcheck

This commit is contained in:
Aditya Ujeniya
2026-02-04 10:24:45 +01:00
parent 42ce598865
commit 39b8356683
5 changed files with 52 additions and 290 deletions

View File

@@ -135,45 +135,3 @@ func debugMetrics(rw http.ResponseWriter, r *http.Request) {
return
}
}
// handleHealthCheck godoc
// @summary HealthCheck endpoint
// @tags healthcheck
// @description This endpoint allows the users to check if a node is healthy
// @produce json
// @param selector query string false "Selector"
// @success 200 {string} string "Debug dump"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /healthcheck/ [get]
func metricsHealth(rw http.ResponseWriter, r *http.Request) {
rawCluster := r.URL.Query().Get("cluster")
rawSubCluster := r.URL.Query().Get("subcluster")
rawNode := r.URL.Query().Get("node")
if rawCluster == "" || rawNode == "" {
handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
return
}
rw.Header().Add("Content-Type", "application/json")
selector := []string{rawCluster, rawNode}
ms := metricstore.GetMemoryStore()
response, err := ms.HealthCheck(selector, rawSubCluster)
if err != nil {
handleError(err, http.StatusBadRequest, rw)
return
}
jsonData, err := json.Marshal(response)
if err != nil {
cclog.Errorf("Error marshaling HealthCheckResponse JSON: %s", err)
}
rw.Write(jsonData)
}

View File

@@ -91,7 +91,7 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
if sc != "" {
metricList := archive.GetMetricConfigSubCluster(req.Cluster, sc)
metricNames := metricListToNames(metricList)
if states, err := ms.HealthCheckAlt(req.Cluster, nl, metricNames); err == nil {
if states, err := ms.HealthCheck(req.Cluster, nl, metricNames); err == nil {
maps.Copy(healthStates, states)
}
}

View File

@@ -81,7 +81,7 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) {
// Cluster List
r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet)
// Slurm node state
r.HandleFunc("/nodestate/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/nodestates/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut)
// Job Handler
if config.Keys.APISubjects == nil {
cclog.Info("Enabling REST start/stop job API")
@@ -127,12 +127,12 @@ func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) {
r.HandleFunc("/free", freeMetrics).Methods(http.MethodPost)
r.HandleFunc("/write", writeMetrics).Methods(http.MethodPost)
r.HandleFunc("/debug", debugMetrics).Methods(http.MethodGet)
r.HandleFunc("/healthcheck", metricsHealth).Methods(http.MethodGet)
r.HandleFunc("/healthcheck", api.updateNodeStates).Methods(http.MethodPost)
// Same endpoints but with trailing slash
r.HandleFunc("/free/", freeMetrics).Methods(http.MethodPost)
r.HandleFunc("/write/", writeMetrics).Methods(http.MethodPost)
r.HandleFunc("/debug/", debugMetrics).Methods(http.MethodGet)
r.HandleFunc("/healthcheck/", metricsHealth).Methods(http.MethodGet)
r.HandleFunc("/healthcheck/", api.updateNodeStates).Methods(http.MethodPost)
}
// MountConfigAPIRoutes registers configuration and user management endpoints.