@@ -37,6 +37,7 @@ test() ->
3737 ok = test_count_children (),
3838 ok = test_one_for_all (),
3939 ok = test_crash_limits (),
40+ ok = try_again_restart (),
4041 ok .
4142
4243test_basic_supervisor () ->
@@ -220,7 +221,50 @@ child_start({trap_exit, Parent}) ->
220221 ok -> ok
221222 end
222223 end ),
223- {ok , Pid }.
224+ {ok , Pid };
225+ child_start ({get_permission , Arbitrator , Parent }) ->
226+ Arbitrator ! {can_start , self ()},
227+ CanStart =
228+ receive
229+ {do_start , Start } -> Start
230+ after 2000 ->
231+ {timeout , arbitrator }
232+ end ,
233+ case CanStart of
234+ true ->
235+ Pid = spawn_link (fun () ->
236+ receive
237+ stop -> exit (normal )
238+ end
239+ end ),
240+ Parent ! Pid ,
241+ {ok , Pid };
242+ false ->
243+ {error , start_denied };
244+ Error ->
245+ {error , Error }
246+ end .
247+
248+ arbitrator_start (Deny ) when is_integer (Deny ) ->
249+ receive
250+ {can_start , From } ->
251+ From ! {do_start , true }
252+ end ,
253+ arbitrator (Deny ).
254+
255+ arbitrator (Deny ) ->
256+ Allow =
257+ if
258+ Deny =< 0 -> true ;
259+ true -> false
260+ end ,
261+ receive
262+ {can_start , From } ->
263+ From ! {do_start , Allow },
264+ arbitrator (Deny - 1 );
265+ shutdown ->
266+ ok
267+ end .
224268
225269test_ping_pong (SupPid ) ->
226270 Pid1 = get_and_test_server (),
@@ -334,7 +378,39 @@ init({test_crash_limits, Intensity, Period, Parent}) ->
334378 modules => [ping_pong_server ]
335379 }
336380 ],
337- {ok , {#{strategy => one_for_one , intensity => Intensity , period => Period }, ChildSpec }}.
381+ {ok , {#{strategy => one_for_one , intensity => Intensity , period => Period }, ChildSpec }};
382+ init ({test_try_again , Arbitrator , Parent }) ->
383+ ChildSpec = [
384+ #{
385+ id => finicky_child ,
386+ start => {? MODULE , child_start , [{get_permission , Arbitrator , Parent }]},
387+ restart => permanent ,
388+ shutdown => brutal_kill ,
389+ type => worker ,
390+ modules => [? MODULE ]
391+ }
392+ ],
393+ {ok , {#{strategy => one_for_one , intensity => 5 , period => 10 }, ChildSpec }};
394+ init ({test_retry_one_for_all , Arbitrator , Parent }) ->
395+ ChildSpec = [
396+ #{
397+ id => ping ,
398+ start => {ping_pong_server , start_link , [Parent ]},
399+ restart => permanent ,
400+ shutdown => 2000 ,
401+ type => worker ,
402+ modules => [? MODULE ]
403+ },
404+ #{
405+ id => crashy_child ,
406+ start => {? MODULE , child_start , [{get_permission , Arbitrator , Parent }]},
407+ restart => permanent ,
408+ shutdown => brutal_kill ,
409+ type => worker ,
410+ modules => [? MODULE ]
411+ }
412+ ],
413+ {ok , {#{strategy => one_for_all , intensity => 5 , period => 10 }, ChildSpec }}.
338414
339415test_supervisor_order () ->
340416 {ok , SupPid } = supervisor :start_link (? MODULE , {test_supervisor_order , self ()}),
@@ -521,3 +597,86 @@ get_ping_pong_pid() ->
521597 {ping_pong_server_ready , Pid } -> Pid
522598 after 2000 -> throw (timeout )
523599 end .
600+
601+ try_again_restart () ->
602+ process_flag (trap_exit , true ),
603+
604+ % % Intensity is 5, use the arbitrator to prevent the child from restarting
605+ % % 4 times. This should not exit the supervisor due to intensity.
606+ Arbitrator1 = erlang :spawn (fun () -> arbitrator_start (4 ) end ),
607+ {ok , SupPid1 } = supervisor :start_link (
608+ {local , try_again_test1 }, ? MODULE , {test_try_again , Arbitrator1 , self ()}
609+ ),
610+ ChildPid = wait_child_pid (),
611+
612+ ChildPid ! stop ,
613+ ChildPid1 = wait_child_pid (),
614+
615+ ChildPid1 ! stop ,
616+ Arbitrator1 ! shutdown ,
617+ exit (SupPid1 , normal ),
618+ ok =
619+ receive
620+ {'EXIT' , SupPid1 , normal } ->
621+ ok
622+ after 2000 ->
623+ error ({supervisor_not_stopped , normal })
624+ end ,
625+
626+ % % Prevent 5 restart attempts allow on the 6th, this should cause the supervisor
627+ % % to shutdown on the 6th attempt, which happens before period expires and we are
628+ % % already at max restart intensity.
629+ Arbitrator2 = erlang :spawn (fun () -> arbitrator_start (5 ) end ),
630+ {ok , SupPid2 } = supervisor :start_link (
631+ {local , test_try_again2 }, ? MODULE , {test_try_again , Arbitrator2 , self ()}
632+ ),
633+ ChildPid2 = wait_child_pid (),
634+
635+ ChildPid2 ! stop ,
636+ ok =
637+ receive
638+ {'EXIT' , SupPid2 , shutdown } ->
639+ ok
640+ after 2000 ->
641+ error ({supervisor_not_stopped , restart_try_again_exceeded })
642+ end ,
643+ Arbitrator2 ! shutdown ,
644+
645+ % % Test one_for_all, child 2 uses arbitrator to deny 4 restart attempts, Ping1 exiting
646+ % % a single time after should cause a supervisor shutdown.
647+ Arbitrator3 = erlang :spawn (fun () -> arbitrator_start (4 ) end ),
648+ {ok , SupPid3 } = supervisor :start_link (
649+ {local , try_again_test3 }, ? MODULE , {test_retry_one_for_all , Arbitrator3 , self ()}
650+ ),
651+
652+ Ping1 = get_and_test_server (),
653+
654+ _Crashy1 = wait_child_pid (),
655+
656+ gen_server :cast (Ping1 , {crash , test }),
657+
658+ _Ping2 = get_and_test_server (),
659+ Crashy2 = wait_child_pid (),
660+ % % this will exit the child triggering a one_for_all restart.
661+ Crashy2 ! stop ,
662+
663+ % % ping_pong_server has 2000ms timeout, we need to wait longer.
664+ ok =
665+ receive
666+ {'EXIT' , SupPid3 , shutdown } ->
667+ ok
668+ after 5000 ->
669+ error ({supervisor_not_stopped , one_for_all_restarts_exceeded })
670+ end ,
671+ Arbitrator3 ! shutdown ,
672+
673+ process_flag (trap_exit , false ),
674+ ok .
675+
676+ wait_child_pid () ->
677+ receive
678+ Pid when is_pid (Pid ) ->
679+ Pid
680+ after 1000 ->
681+ error ({timeout , no_child_pid })
682+ end .
0 commit comments