@@ -37,6 +37,7 @@ test() ->
3737 ok = test_count_children (),
3838 ok = test_one_for_all (),
3939 ok = test_crash_limits (),
40+ ok = try_again_restart (),
4041 ok .
4142
4243test_basic_supervisor () ->
@@ -220,7 +221,51 @@ child_start({trap_exit, Parent}) ->
220221 ok -> ok
221222 end
222223 end ),
223- {ok , Pid }.
224+ {ok , Pid };
225+ child_start ({get_permission , Arbitrator , Parent }) ->
226+ Arbitrator ! {can_start , self ()},
227+ CanStart =
228+ receive
229+ {do_start , Start } -> Start
230+ after 2000 ->
231+ {timeout , arbitrator }
232+ end ,
233+ case CanStart of
234+ true ->
235+ Pid = spawn_link (fun () ->
236+ receive
237+ stop -> exit (normal )
238+ end
239+ end ),
240+ register (crashy , Pid ),
241+ Parent ! Pid ,
242+ {ok , Pid };
243+ false ->
244+ {error , start_denied };
245+ Error ->
246+ {error , Error }
247+ end .
248+
249+ arbitrator_start (Deny ) when is_integer (Deny ) ->
250+ receive
251+ {can_start , From } ->
252+ From ! {do_start , true }
253+ end ,
254+ arbitrator (Deny ).
255+
256+ arbitrator (Deny ) ->
257+ Allow =
258+ if
259+ Deny =< 0 -> true ;
260+ true -> false
261+ end ,
262+ receive
263+ {can_start , From } ->
264+ From ! {do_start , Allow },
265+ arbitrator (Deny - 1 );
266+ shutdown ->
267+ ok
268+ end .
224269
225270test_ping_pong (SupPid ) ->
226271 Pid1 = get_and_test_server (),
@@ -334,7 +379,39 @@ init({test_crash_limits, Intensity, Period, Parent}) ->
334379 modules => [ping_pong_server ]
335380 }
336381 ],
337- {ok , {#{strategy => one_for_one , intensity => Intensity , period => Period }, ChildSpec }}.
382+ {ok , {#{strategy => one_for_one , intensity => Intensity , period => Period }, ChildSpec }};
383+ init ({test_try_again , Arbitrator , Parent }) ->
384+ ChildSpec = [
385+ #{
386+ id => finicky_child ,
387+ start => {? MODULE , child_start , [{get_permission , Arbitrator , Parent }]},
388+ restart => permanent ,
389+ shutdown => brutal_kill ,
390+ type => worker ,
391+ modules => [? MODULE ]
392+ }
393+ ],
394+ {ok , {#{strategy => one_for_one , intensity => 5 , period => 10 }, ChildSpec }};
395+ init ({test_retry_one_for_all , Arbitrator , Parent }) ->
396+ ChildSpec = [
397+ #{
398+ id => ping ,
399+ start => {ping_pong_server , start_link , [Parent ]},
400+ restart => permanent ,
401+ shutdown => brutal_kill ,
402+ type => worker ,
403+ modules => [ping_pong_server ]
404+ },
405+ #{
406+ id => crashy_child ,
407+ start => {? MODULE , child_start , [{get_permission , Arbitrator , Parent }]},
408+ restart => permanent ,
409+ shutdown => brutal_kill ,
410+ type => worker ,
411+ modules => [? MODULE ]
412+ }
413+ ],
414+ {ok , {#{strategy => one_for_all , intensity => 5 , period => 10 }, ChildSpec }}.
338415
339416test_supervisor_order () ->
340417 {ok , SupPid } = supervisor :start_link (? MODULE , {test_supervisor_order , self ()}),
@@ -521,3 +598,110 @@ get_ping_pong_pid() ->
521598 {ping_pong_server_ready , Pid } -> Pid
522599 after 2000 -> throw (timeout )
523600 end .
601+
602+ try_again_restart () ->
603+ process_flag (trap_exit , true ),
604+
605+ % % Intensity is 5, use the arbitrator to prevent the child from restarting
606+ % % 4 times. This should not exit the supervisor due to intensity.
607+ Arbitrator1 = erlang :spawn (fun () -> arbitrator_start (4 ) end ),
608+ {ok , SupPid1 } = supervisor :start_link (
609+ {local , try_again_test1 }, ? MODULE , {test_try_again , Arbitrator1 , self ()}
610+ ),
611+ ChildPid = wait_child_pid (" ChildPid" ),
612+
613+ ChildPid ! stop ,
614+ ChildPid1 = wait_child_pid (" ChildPid1" ),
615+
616+ ChildPid1 ! stop ,
617+ Arbitrator1 ! shutdown ,
618+ exit (SupPid1 , normal ),
619+ ok =
620+ receive
621+ {'EXIT' , SupPid1 , normal } ->
622+ ok
623+ after 2000 ->
624+ error ({supervisor_not_stopped , normal })
625+ end ,
626+
627+ % % Prevent 5 restart attempts allow on the 6th, this should cause the supervisor
628+ % % to shutdown on the 6th attempt, which happens before period expires and we are
629+ % % already at max restart intensity.
630+ Arbitrator2 = erlang :spawn (fun () -> arbitrator_start (5 ) end ),
631+ {ok , SupPid2 } = supervisor :start_link (
632+ {local , test_try_again2 }, ? MODULE , {test_try_again , Arbitrator2 , self ()}
633+ ),
634+ ChildPid2 = wait_child_pid (" ChildPid2" ),
635+
636+ ChildPid2 ! stop ,
637+ ok =
638+ receive
639+ {'EXIT' , SupPid2 , shutdown } ->
640+ ok
641+ after 2000 ->
642+ error ({supervisor_not_stopped , restart_try_again_exceeded })
643+ end ,
644+ Arbitrator2 ! shutdown ,
645+
646+ % % Test one_for_all
647+ % % child 2 uses arbitrator to deny 3 restart attempts, succeeding on the 4th.
648+ Arbitrator3 = erlang :spawn (fun () -> arbitrator_start (4 ) end ),
649+ {ok , SupPid3 } = supervisor :start_link (
650+ {local , try_again_test3 }, ? MODULE , {test_retry_one_for_all , Arbitrator3 , self ()}
651+ ),
652+
653+ Ping1 = wait_ping_server (),
654+ _Crashy1 = wait_child_pid (" Crashy1" ),
655+
656+ % % this will take 5 restarts (1 to restart ping + 3 denied attempts for
657+ % % crashy and succeed on the 4th)
658+ gen_server :call (Ping1 , {stop , normal }),
659+
660+ Ping2 = wait_ping_server (),
661+ % % Crashy will restart without error since the deny count was reached after
662+ % % first time it was stopped
663+ _Crashy2 = wait_child_pid (" Crashy2" ),
664+
665+ % % this will surely exceed the limit
666+ % crashy ! stop,
667+ gen_server :call (Ping2 , {stop , normal }),
668+
669+ % % ping_pong_server has 2000ms timeout, we need to wait longer.
670+ ok =
671+ receive
672+ {'EXIT' , SupPid3 , shutdown } ->
673+ ok
674+ after 5000 ->
675+ error ({supervisor_not_stopped , one_for_all_restarts_exceeded })
676+ end ,
677+ Arbitrator3 ! shutdown ,
678+
679+ process_flag (trap_exit , false ),
680+ ok .
681+
682+ wait_ping_server () ->
683+ receive
684+ {ping_pong_server_ready , Pid } ->
685+ Pid ;
686+ {'EXIT' , _ , shutdown } ->
687+ error ({unexpected , supervisor_shutdown });
688+ {'EXIT' , _ , _ } = Exit ->
689+ % % TODO: remove before final commit, just for debugging what might have
690+ % % happend to the the ping_pong gen_server on OTP... \o/
691+ io :format (" ~n >>> TEST wait_ping_server/0 caught exit: ~w~n " , [Exit ]),
692+ wait_ping_server ()
693+ after 2000 ->
694+ error ({timeout , no_child_pid , ping_pong_server })
695+ end .
696+
697+ wait_child_pid (Name ) ->
698+ receive
699+ Pid when is_pid (Pid ) ->
700+ Pid ;
701+ {'EXIT' , _ , shutdown } ->
702+ error ({unexpected , supervisor_shutdown });
703+ {'EXIT' , _ , _ } ->
704+ wait_child_pid (Name )
705+ after 1000 ->
706+ error ({timeout , no_child_pid , Name })
707+ end .
0 commit comments