diff --git a/deps/rabbitmq_management/priv/www/api/index.html b/deps/rabbitmq_management/priv/www/api/index.html index 54015e0fc91e..ad7cb4f1765d 100644 --- a/deps/rabbitmq_management/priv/www/api/index.html +++ b/deps/rabbitmq_management/priv/www/api/index.html @@ -1252,6 +1252,41 @@

Reference

Service Unavailable. + + X + + + + /api/health/checks/below-node-connection-limit + + Responds a 200 OK if the target node has fewer connections to the AMQP + and AMQPS ports than the configured maximum, otherwise responds with a + 503 Service Unavailable. + + + + X + + + + /api/health/checks/ready-to-serve-clients + +

+ Responds a 200 OK if the target node is ready to serve clients, otherwise + responds with a 503 Service Unavailable. This check combines: +

+
    +
  1. /api/health/checks/is-in-service
  2. +
  3. /api/health/checks/protocol-listener/amqp or /api/health/checks/protocol-listener/amqps
  4. +
  5. /api/health/checks/below-node-connection-limit
  6. +
+

+ So this check will only return 200 OK if the target node is in service, + an AMQP or AMQPS listener is available and the target node has fewer active + AMQP and AMQPS connections that its configured limit. +

+ + X diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl b/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl index ece7c1372666..9f939558563a 100644 --- a/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl +++ b/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl @@ -208,6 +208,8 @@ dispatcher() -> {"/health/checks/quorum-queues-without-elected-leaders/vhost/:vhost/pattern/:pattern", rabbit_mgmt_wm_health_check_quorum_queues_without_elected_leaders, []}, {"/health/checks/node-is-quorum-critical", rabbit_mgmt_wm_health_check_node_is_quorum_critical, []}, {"/health/checks/is-in-service", rabbit_mgmt_wm_health_check_is_in_service, []}, + {"/health/checks/below-node-connection-limit", rabbit_mgmt_wm_health_check_below_node_connection_limit, []}, + {"/health/checks/ready-to-serve-clients", rabbit_mgmt_wm_health_check_ready_to_serve_clients, []}, {"/reset", rabbit_mgmt_wm_reset, []}, {"/reset/:node", rabbit_mgmt_wm_reset, []}, {"/rebalance/queues", rabbit_mgmt_wm_rebalance_queues, [{queues, all}]}, diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_below_node_connection_limit.erl b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_below_node_connection_limit.erl new file mode 100644 index 000000000000..df2cf1882c22 --- /dev/null +++ b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_below_node_connection_limit.erl @@ -0,0 +1,63 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved. +%% + +-module(rabbit_mgmt_wm_health_check_below_node_connection_limit). + +-export([init/2]). +-export([to_json/2, content_types_provided/2]). +-export([variances/2]). + +-include("rabbit_mgmt.hrl"). +-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl"). + +init(Req, _State) -> + Req1 = rabbit_mgmt_headers:set_no_cache_headers( + rabbit_mgmt_headers:set_common_permission_headers( + Req, ?MODULE), ?MODULE), + {cowboy_rest, Req1, #context{}}. + +variances(Req, Context) -> + {[<<"accept-encoding">>, <<"origin">>], Req, Context}. + +content_types_provided(ReqData, Context) -> + {rabbit_mgmt_util:responder_map(to_json), ReqData, Context}. + +to_json(ReqData, Context) -> + ActiveConns = lists:foldl( + fun(Protocol, Acc) -> + Acc + protocol_connection_count(Protocol) + end, 0, [amqp, 'amqp/ssl']), + Limit = rabbit_misc:get_env(rabbit, connection_max, infinity), + case ActiveConns < Limit of + true -> + rabbit_mgmt_util:reply( + #{status => ok, + limit => Limit, + connections => ActiveConns}, ReqData, Context); + false -> + Body = #{ + status => failed, + reason => <<"node connection limit is reached">>, + limit => Limit, + connections => ActiveConns + }, + {Response, ReqData1, Context1} = rabbit_mgmt_util:reply( + Body, ReqData, Context), + {stop, + cowboy_req:reply( + ?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1), + Context1} + end. + +protocol_connection_count(Protocol) -> + case rabbit_networking:ranch_ref_of_protocol(Protocol) of + undefined -> + 0; + RanchRef -> + #{active_connections := Count} = ranch:info(RanchRef), + Count + end. diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_ready_to_serve_clients.erl b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_ready_to_serve_clients.erl new file mode 100644 index 000000000000..762bb2d1e692 --- /dev/null +++ b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_ready_to_serve_clients.erl @@ -0,0 +1,81 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved. +%% + +%% A composite health check that combines: +%% * GET /api/health/checks/is-in-service +%% * GET /api/health/checks/protocol-listener/amqp +%% * GET /api/health/checks/below-node-connection-limit + +-module(rabbit_mgmt_wm_health_check_ready_to_serve_clients). + +-export([init/2]). +-export([to_json/2, content_types_provided/2]). +-export([variances/2]). + +-include("rabbit_mgmt.hrl"). +-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl"). + +init(Req, _State) -> + Req1 = rabbit_mgmt_headers:set_no_cache_headers( + rabbit_mgmt_headers:set_common_permission_headers( + Req, ?MODULE), ?MODULE), + {cowboy_rest, Req1, #context{}}. + +variances(Req, Context) -> + {[<<"accept-encoding">>, <<"origin">>], Req, Context}. + +content_types_provided(ReqData, Context) -> + {rabbit_mgmt_util:responder_map(to_json), ReqData, Context}. + +to_json(ReqData, Context) -> + case check() of + {ok, Body} -> + rabbit_mgmt_util:reply(Body, ReqData, Context); + {error, Body} -> + {Response, ReqData1, Context1} = rabbit_mgmt_util:reply( + Body, ReqData, Context), + {stop, + cowboy_req:reply( + ?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1), + Context1} + end. + +check() -> + case rabbit:is_serving() of + true -> + RanchRefs0 = [ + rabbit_networking:ranch_ref_of_protocol(amqp), + rabbit_networking:ranch_ref_of_protocol('amqp/ssl') + ], + RanchRefs = [R || R <- RanchRefs0, R =/= undefined], + case RanchRefs of + [_ | _] -> + ActiveConns = lists:foldl( + fun(RanchRef, Acc) -> + #{active_connections := Count} = ranch:info(RanchRef), + Acc + Count + end, 0, RanchRefs), + Limit = rabbit_misc:get_env(rabbit, connection_max, infinity), + case ActiveConns < Limit of + true -> + {ok, #{status => ok, + limit => Limit, + connections => ActiveConns}}; + false -> + {error, #{status => failed, + reason => <<"node connection limit is reached">>, + limit => Limit, + connections => ActiveConns}} + end; + [] -> + {error, #{status => failed, + reason => <<"no active listeners for AMQP/AMQPS">>}} + end; + false -> + {error, #{status => failed, + reason => <<"the rabbit node is not currently available to serve">>}} + end. diff --git a/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl b/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl index 975e6f6ee409..b3304d3d9b99 100644 --- a/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl +++ b/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl @@ -51,7 +51,9 @@ all_tests() -> [ protocol_listener_test, port_listener_test, certificate_expiration_test, - is_in_service_test + is_in_service_test, + below_node_connection_limit_test, + ready_to_serve_clients_test ]. %% ------------------------------------------------------------------- @@ -470,8 +472,66 @@ is_in_service_test(Config) -> passed. +below_node_connection_limit_test(Config) -> + Path = "/health/checks/below-node-connection-limit", + Check0 = http_get(Config, Path, ?OK), + ?assertEqual(<<"ok">>, maps:get(status, Check0)), + ?assertEqual(0, maps:get(connections, Check0)), + ?assertEqual(<<"infinity">>, maps:get(limit, Check0)), + + %% Set the connection limit low and open 'limit' connections. + Limit = 10, + rabbit_ct_broker_helpers:rpc( + Config, 0, application, set_env, [rabbit, connection_max, Limit]), + Connections = [rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0) || _ <- lists:seq(1, Limit)], + true = lists:all(fun(E) -> is_pid(E) end, Connections), + {error, not_allowed} = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0), + + Body0 = http_get_failed(Config, Path), + ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body0)), + ?assertEqual(10, maps:get(<<"limit">>, Body0)), + ?assertEqual(10, maps:get(<<"connections">>, Body0)), + + %% Clean up the connections and reset the limit. + [catch rabbit_ct_client_helpers:close_connection(C) || C <- Connections], + rabbit_ct_broker_helpers:rpc( + Config, 0, application, set_env, [rabbit, connection_max, infinity]), + + passed. + +ready_to_serve_clients_test(Config) -> + Path = "/health/checks/ready-to-serve-clients", + Check0 = http_get(Config, Path, ?OK), + ?assertEqual(<<"ok">>, maps:get(status, Check0)), + + true = rabbit_ct_broker_helpers:mark_as_being_drained(Config, 0), + Body0 = http_get_failed(Config, Path), + ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body0)), + true = rabbit_ct_broker_helpers:unmark_as_being_drained(Config, 0), + + %% Set the connection limit low and open 'limit' connections. + Limit = 10, + rabbit_ct_broker_helpers:rpc( + Config, 0, application, set_env, [rabbit, connection_max, Limit]), + Connections = [rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0) || _ <- lists:seq(1, Limit)], + true = lists:all(fun(E) -> is_pid(E) end, Connections), + {error, not_allowed} = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0), + + Body1 = http_get_failed(Config, Path), + ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body1)), + ?assertEqual(10, maps:get(<<"limit">>, Body1)), + ?assertEqual(10, maps:get(<<"connections">>, Body1)), + + %% Clean up the connections and reset the limit. + [catch rabbit_ct_client_helpers:close_connection(C) || C <- Connections], + rabbit_ct_broker_helpers:rpc( + Config, 0, application, set_env, [rabbit, connection_max, infinity]), + + passed. + http_get_failed(Config, Path) -> {ok, {{_, Code, _}, _, ResBody}} = req(Config, get, Path, [auth_header("guest", "guest")]), + ct:pal("GET ~s: ~w ~w", [Path, Code, ResBody]), ?assertEqual(Code, ?HEALTH_CHECK_FAILURE_STATUS), rabbit_json:decode(rabbit_data_coercion:to_binary(ResBody)).