@@ -37,6 +37,7 @@ test() ->
3737 ok = test_count_children (),
3838 ok = test_one_for_all (),
3939 ok = test_crash_limits (),
40+ ok = try_again_restart (),
4041 ok .
4142
4243test_basic_supervisor () ->
@@ -220,7 +221,51 @@ child_start({trap_exit, Parent}) ->
220221 ok -> ok
221222 end
222223 end ),
223- {ok , Pid }.
224+ {ok , Pid };
225+ child_start ({get_permission , Arbitrator , Parent }) ->
226+ Arbitrator ! {can_start , self ()},
227+ CanStart =
228+ receive
229+ {do_start , Start } -> Start
230+ after 2000 ->
231+ {timeout , arbitrator }
232+ end ,
233+ case CanStart of
234+ true ->
235+ Pid = spawn_link (fun () ->
236+ receive
237+ stop -> exit (normal )
238+ end
239+ end ),
240+ register (crashy , Pid ),
241+ Parent ! Pid ,
242+ {ok , Pid };
243+ false ->
244+ {error , start_denied };
245+ Error ->
246+ {error , Error }
247+ end .
248+
249+ arbitrator_start (Deny ) when is_integer (Deny ) ->
250+ receive
251+ {can_start , From } ->
252+ From ! {do_start , true }
253+ end ,
254+ arbitrator (Deny ).
255+
256+ arbitrator (Deny ) ->
257+ Allow =
258+ if
259+ Deny =< 0 -> true ;
260+ true -> false
261+ end ,
262+ receive
263+ {can_start , From } ->
264+ From ! {do_start , Allow },
265+ arbitrator (Deny - 1 );
266+ shutdown ->
267+ ok
268+ end .
224269
225270test_ping_pong (SupPid ) ->
226271 Pid1 = get_and_test_server (),
@@ -334,7 +379,39 @@ init({test_crash_limits, Intensity, Period, Parent}) ->
334379 modules => [ping_pong_server ]
335380 }
336381 ],
337- {ok , {#{strategy => one_for_one , intensity => Intensity , period => Period }, ChildSpec }}.
382+ {ok , {#{strategy => one_for_one , intensity => Intensity , period => Period }, ChildSpec }};
383+ init ({test_try_again , Arbitrator , Parent }) ->
384+ ChildSpec = [
385+ #{
386+ id => finicky_child ,
387+ start => {? MODULE , child_start , [{get_permission , Arbitrator , Parent }]},
388+ restart => permanent ,
389+ shutdown => brutal_kill ,
390+ type => worker ,
391+ modules => [? MODULE ]
392+ }
393+ ],
394+ {ok , {#{strategy => one_for_one , intensity => 5 , period => 10 }, ChildSpec }};
395+ init ({test_retry_one_for_all , Arbitrator , Parent }) ->
396+ ChildSpec = [
397+ #{
398+ id => ping ,
399+ start => {ping_pong_server , start_link , [Parent ]},
400+ restart => permanent ,
401+ shutdown => brutal_kill ,
402+ type => worker ,
403+ modules => [ping_pong_server ]
404+ },
405+ #{
406+ id => crashy_child ,
407+ start => {? MODULE , child_start , [{get_permission , Arbitrator , Parent }]},
408+ restart => permanent ,
409+ shutdown => brutal_kill ,
410+ type => worker ,
411+ modules => [? MODULE ]
412+ }
413+ ],
414+ {ok , {#{strategy => one_for_all , intensity => 5 , period => 10 }, ChildSpec }}.
338415
339416test_supervisor_order () ->
340417 {ok , SupPid } = supervisor :start_link (? MODULE , {test_supervisor_order , self ()}),
@@ -436,7 +513,7 @@ test_one_for_all() ->
436513 end
437514 end ,
438515
439- demonitor (MonRef ),
516+ demonitor (MonRef , [ flush ] ),
440517
441518 % Collect startup message from restarted transient ping_pong_server child
442519 _Restart_2 = get_and_test_server (),
@@ -521,3 +598,127 @@ get_ping_pong_pid() ->
521598 {ping_pong_server_ready , Pid } -> Pid
522599 after 2000 -> throw (timeout )
523600 end .
601+
602+ try_again_restart () ->
603+ process_flag (trap_exit , true ),
604+
605+ % % Intensity is 5, use the arbitrator to prevent the child from restarting
606+ % % 4 times. This should not exit the supervisor due to intensity.
607+ Arbitrator1 = erlang :spawn (fun () -> arbitrator_start (4 ) end ),
608+ {ok , SupPid1 } = supervisor :start_link (
609+ {local , try_again_test1 }, ? MODULE , {test_try_again , Arbitrator1 , self ()}
610+ ),
611+ ChildPid = wait_child_pid (" ChildPid" ),
612+
613+ ChildPid ! stop ,
614+ ChildPid1 = wait_child_pid (" ChildPid1" ),
615+
616+ ChildPid1 ! stop ,
617+ Arbitrator1 ! shutdown ,
618+ exit (SupPid1 , normal ),
619+ ok =
620+ receive
621+ {'EXIT' , SupPid1 , normal } ->
622+ ok
623+ after 2000 ->
624+ error ({supervisor_not_stopped , normal })
625+ end ,
626+
627+ % % Prevent 5 restart attempts allow on the 6th, this should cause the supervisor
628+ % % to shutdown on the 6th attempt, which happens before period expires and we are
629+ % % already at max restart intensity.
630+ Arbitrator2 = erlang :spawn (fun () -> arbitrator_start (5 ) end ),
631+ {ok , SupPid2 } = supervisor :start_link (
632+ {local , test_try_again2 }, ? MODULE , {test_try_again , Arbitrator2 , self ()}
633+ ),
634+ ChildPid2 = wait_child_pid (" ChildPid2" ),
635+
636+ ChildPid2 ! stop ,
637+ ok =
638+ receive
639+ {'EXIT' , SupPid2 , shutdown } ->
640+ ok
641+ after 2000 ->
642+ error ({supervisor_not_stopped , restart_try_again_exceeded })
643+ end ,
644+ Arbitrator2 ! shutdown ,
645+
646+ % % Test one_for_all
647+ % % child 2 uses arbitrator to deny 4 restart attempts, succeeding on the 5th.
648+ Arbitrator3 = erlang :spawn (fun () -> arbitrator_start (4 ) end ),
649+ {ok , SupPid3 } = supervisor :start_link (
650+ {local , try_again_test3 }, ? MODULE , {test_retry_one_for_all , Arbitrator3 , self ()}
651+ ),
652+
653+ {Ping1 , _Crashy1 } = wait_ping_server_and_child_pid (),
654+
655+ % % this will require 6 restarts (1 to restart ping + 4 denied attempts for
656+ % % crashy and succeed on the 5th)
657+ gen_server :call (Ping1 , {stop , normal }),
658+
659+ % % Crashy will restart without error since the deny count was reached after
660+ % % first time it was stopped
661+ {Ping2 , _Crashy2 } = wait_ping_server_and_child_pid (),
662+
663+ % % this will surely exceed the limit
664+ % crashy ! stop,
665+ ok = gen_server :call (Ping2 , {stop , normal }),
666+
667+ % % ping_pong_server has 2000ms timeout, we need to wait longer.
668+ ok =
669+ receive
670+ {'EXIT' , SupPid3 , shutdown } ->
671+ ok
672+ after 5000 ->
673+ error ({supervisor_not_stopped , one_for_all_restarts_exceeded })
674+ end ,
675+ Arbitrator3 ! shutdown ,
676+
677+ process_flag (trap_exit , false ),
678+ ok .
679+
680+ wait_child_pid (Name ) ->
681+ receive
682+ Pid when is_pid (Pid ) ->
683+ Pid ;
684+ {'EXIT' , _ , shutdown } ->
685+ error ({unexpected , supervisor_shutdown });
686+ {'EXIT' , _ , _ } ->
687+ wait_child_pid (Name )
688+ after 1000 ->
689+ error ({timeout , no_child_pid , Name })
690+ end .
691+
692+ % % In the case where we have one_for_all, process all `ping_pong_server_ready`
693+ % % messages until we get the crashy `pid()` message which means the crashy
694+ % % process eventually started. Last `ping_pong_server_ready` will be received.
695+ wait_ping_server_and_child_pid () ->
696+ wait_ping_server_and_child_pid0 (undefined , undefined ).
697+
698+ wait_ping_server_and_child_pid0 (PingPongPid , ChildPid ) ->
699+ Timeout =
700+ if
701+ is_pid (PingPongPid ) andalso is_pid (ChildPid ) -> 0 ;
702+ true -> 2000
703+ end ,
704+ receive
705+ {ping_pong_server_ready , NewPingPongPid } ->
706+ wait_ping_server_and_child_pid0 (NewPingPongPid , ChildPid );
707+ NewChildPid when is_pid (NewChildPid ) ->
708+ wait_ping_server_and_child_pid0 (PingPongPid , NewChildPid );
709+ {'EXIT' , _ , shutdown } ->
710+ error ({unexpected , supervisor_shutdown });
711+ {'EXIT' , _ , _ } ->
712+ wait_ping_server_and_child_pid0 (PingPongPid , ChildPid )
713+ after Timeout ->
714+ if
715+ is_pid (PingPongPid ) andalso is_pid (ChildPid ) ->
716+ {PingPongPid , ChildPid };
717+ is_pid (PingPongPid ) ->
718+ error ({timeout , no_child_pid , crashy });
719+ is_pid (ChildPid ) ->
720+ error ({timeout , no_child_pid , ping_pong_server });
721+ true ->
722+ error ({timeout , no_child_pid , either })
723+ end
724+ end .
0 commit comments