Skip to content

Commit bffd562

Browse files
Merge pull request #1848 from rabbitmq/await_startup_with_a_timeout
Introduce a function that awaits startup with a timeout
2 parents dd947c6 + b7065ea commit bffd562

File tree

1 file changed

+78
-18
lines changed

1 file changed

+78
-18
lines changed

src/rabbit.erl

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
-behaviour(application).
2424

2525
-export([start/0, boot/0, stop/0,
26-
stop_and_halt/0, await_startup/0, await_startup/1,
26+
stop_and_halt/0, await_startup/0, await_startup/1, await_startup/3,
2727
status/0, is_running/0, alarms/0,
2828
is_running/1, environment/0, rotate_logs/0,
2929
start_fhc/0]).
@@ -236,6 +236,13 @@
236236

237237
-define(ASYNC_THREADS_WARNING_THRESHOLD, 8).
238238

239+
%% 1 minute
240+
-define(BOOT_START_TIMEOUT, 1 * 60 * 1000).
241+
%% 12 hours
242+
-define(BOOT_FINISH_TIMEOUT, 12 * 60 * 60 * 1000).
243+
%% 100 ms
244+
-define(BOOT_STATUS_CHECK_INTERVAL, 100).
245+
239246
%%----------------------------------------------------------------------------
240247

241248
-type restart_type() :: 'permanent' | 'transient' | 'temporary'.
@@ -248,7 +255,7 @@
248255
-spec boot() -> 'ok'.
249256
-spec stop() -> 'ok'.
250257
-spec stop_and_halt() -> no_return().
251-
-spec await_startup() -> 'ok'.
258+
252259
-spec status
253260
() -> [{pid, integer()} |
254261
{running_applications, [{atom(), string(), string()}]} |
@@ -682,41 +689,83 @@ handle_app_error(Term) ->
682689
throw({Term, App, Reason})
683690
end.
684691

692+
is_booting() -> is_booting(node()).
693+
694+
is_booting(Node) ->
695+
case rpc:call(Node, erlang, whereis, [rabbit_boot]) of
696+
{badrpc, _} = Err -> Err;
697+
undefined -> false;
698+
P when is_pid(P) -> true
699+
end.
700+
701+
702+
-spec await_startup() -> 'ok' | {'error', 'timeout'}.
685703
await_startup() ->
686-
await_startup(node()).
704+
await_startup(node(), false).
687705

688-
await_startup(Node) ->
706+
-spec await_startup(node() | non_neg_integer()) -> 'ok' | {'error', 'timeout'}.
707+
await_startup(Node) when is_atom(Node) ->
708+
await_startup(Node, false);
709+
await_startup(Timeout) when is_integer(Timeout) ->
710+
await_startup(node(), false, Timeout).
711+
712+
-spec await_startup(node(), boolean()) -> 'ok' | {'error', 'timeout'}.
713+
await_startup(Node, PrintProgressReports) ->
689714
case is_booting(Node) of
690-
true -> wait_for_boot_to_finish(Node);
715+
true -> wait_for_boot_to_finish(Node, PrintProgressReports);
691716
false ->
692717
case is_running(Node) of
693-
true -> ok;
718+
true -> ok;
694719
false -> wait_for_boot_to_start(Node),
695-
wait_for_boot_to_finish(Node)
720+
wait_for_boot_to_finish(Node, PrintProgressReports)
696721
end
697722
end.
698723

699-
is_booting() -> is_booting(node()).
700-
701-
is_booting(Node) ->
702-
case rpc:call(Node, erlang, whereis, [rabbit_boot]) of
703-
{badrpc, _} = Err -> Err;
704-
undefined -> false;
705-
P when is_pid(P) -> true
724+
-spec await_startup(node(), boolean(), non_neg_integer()) -> 'ok' | {'error', 'timeout'}.
725+
await_startup(Node, PrintProgressReports, Timeout) ->
726+
case is_booting(Node) of
727+
true -> wait_for_boot_to_finish(Node, PrintProgressReports, Timeout);
728+
false ->
729+
case is_running(Node) of
730+
true -> ok;
731+
false -> wait_for_boot_to_start(Node, Timeout),
732+
wait_for_boot_to_finish(Node, PrintProgressReports, Timeout)
733+
end
706734
end.
707735

708736
wait_for_boot_to_start(Node) ->
737+
wait_for_boot_to_start(Node, ?BOOT_START_TIMEOUT).
738+
739+
wait_for_boot_to_start(Node, Timeout) ->
740+
Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL,
741+
do_wait_for_boot_to_start(Node, Iterations).
742+
743+
do_wait_for_boot_to_start(_Node, IterationsLeft) when IterationsLeft =< 0 ->
744+
{error, timeout};
745+
do_wait_for_boot_to_start(Node, IterationsLeft) ->
709746
case is_booting(Node) of
710747
false ->
711-
timer:sleep(100),
712-
wait_for_boot_to_start(Node);
748+
timer:sleep(?BOOT_STATUS_CHECK_INTERVAL),
749+
do_wait_for_boot_to_start(Node, IterationsLeft - 1);
713750
{badrpc, _} = Err ->
714751
Err;
715752
true ->
716753
ok
717754
end.
718755

719756
wait_for_boot_to_finish(Node) ->
757+
wait_for_boot_to_finish(Node, false, ?BOOT_FINISH_TIMEOUT).
758+
759+
wait_for_boot_to_finish(Node, PrintProgressReports) ->
760+
wait_for_boot_to_finish(Node, PrintProgressReports, ?BOOT_FINISH_TIMEOUT).
761+
762+
wait_for_boot_to_finish(Node, PrintProgressReports, Timeout) ->
763+
Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL,
764+
do_wait_for_boot_to_finish(Node, PrintProgressReports, Iterations).
765+
766+
do_wait_for_boot_to_finish(_Node, _PrintProgressReports, IterationsLeft) when IterationsLeft =< 0 ->
767+
{error, timeout};
768+
do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft) ->
720769
case is_booting(Node) of
721770
false ->
722771
%% We don't want badrpc error to be interpreted as false,
@@ -729,10 +778,21 @@ wait_for_boot_to_finish(Node) ->
729778
{badrpc, _} = Err ->
730779
Err;
731780
true ->
732-
timer:sleep(100),
733-
wait_for_boot_to_finish(Node)
781+
maybe_print_boot_progress(PrintProgressReports, IterationsLeft),
782+
timer:sleep(?BOOT_STATUS_CHECK_INTERVAL),
783+
do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft - 1)
734784
end.
735785

786+
maybe_print_boot_progress(false = _PrintProgressReports, _IterationsLeft) ->
787+
ok;
788+
maybe_print_boot_progress(true, IterationsLeft) ->
789+
case IterationsLeft rem 100 of
790+
%% This will be printed on the CLI command end to illustrate some
791+
%% progress.
792+
0 -> io:format("Still booting, will check again in 10 seconds...~n");
793+
_ -> ok
794+
end.
795+
736796
status() ->
737797
S1 = [{pid, list_to_integer(os:getpid())},
738798
%% The timeout value used is twice that of gen_server:call/2.

0 commit comments

Comments
 (0)