|
92 | 92 | modules = [] :: [module()] | dynamic |
93 | 93 | }). |
94 | 94 | %% note: the list of children should always be kept in order, with first to start at the head. |
95 | | --record(state, {restart_strategy = one_for_one :: strategy(), children = [] :: [#child{}]}). |
| 95 | +-record(state, { |
| 96 | + restart_strategy = one_for_one :: strategy(), |
| 97 | + intensity = 1 :: non_neg_integer(), |
| 98 | + period = 5 :: pos_integer(), |
| 99 | + restart_count = 0 :: non_neg_integer(), |
| 100 | + restarts = [] :: [integer()], |
| 101 | + children = [] :: [#child{}] |
| 102 | +}). |
| 103 | + |
| 104 | +%% Used to trim stale restarts when the 'intensity' value is large. |
| 105 | +%% The number of restarts before triggering a purge of restarts older |
| 106 | +%% than 'period', so stale restarts do not continue to consume ram for |
| 107 | +%% the sake of MCUs with limited memory. In the future a function |
| 108 | +%% could be used to set a sane default for the platform (OTP uses 1000). |
| 109 | +-define(STALE_RESTART_LIMIT, 100). |
96 | 110 |
|
97 | 111 | start_link(Module, Args) -> |
98 | 112 | gen_server:start_link(?MODULE, {Module, Args}, []). |
@@ -121,12 +135,23 @@ count_children(Supervisor) -> |
121 | 135 | init({Mod, Args}) -> |
122 | 136 | erlang:process_flag(trap_exit, true), |
123 | 137 | case Mod:init(Args) of |
124 | | - {ok, {{Strategy, _Intensity, _Period}, StartSpec}} -> |
125 | | - State = init_state(StartSpec, #state{restart_strategy = Strategy}), |
| 138 | + {ok, {{Strategy, Intensity, Period}, StartSpec}} -> |
| 139 | + State = init_state(StartSpec, #state{ |
| 140 | + restart_strategy = Strategy, |
| 141 | + intensity = Intensity, |
| 142 | + period = Period |
| 143 | + }), |
126 | 144 | NewChildren = start_children(State#state.children, []), |
127 | 145 | {ok, State#state{children = NewChildren}}; |
128 | | - {ok, {#{strategy := Strategy}, StartSpec}} -> |
129 | | - State = init_state(StartSpec, #state{restart_strategy = Strategy}), |
| 146 | + {ok, {#{} = SupSpec, StartSpec}} -> |
| 147 | + Strategy = maps:get(strategy, SupSpec, one_for_one), |
| 148 | + Intensity = maps:get(intensity, SupSpec, 3), |
| 149 | + Period = maps:get(period, SupSpec, 5), |
| 150 | + State = init_state(StartSpec, #state{ |
| 151 | + restart_strategy = Strategy, |
| 152 | + intensity = Intensity, |
| 153 | + period = Period |
| 154 | + }), |
130 | 155 | NewChildren = start_children(State#state.children, []), |
131 | 156 | {ok, State#state{children = NewChildren}}; |
132 | 157 | Error -> |
@@ -323,7 +348,15 @@ handle_child_exit(Pid, Reason, State) -> |
323 | 348 | #child{} = Child -> |
324 | 349 | case should_restart(Reason, Child#child.restart) of |
325 | 350 | true -> |
326 | | - handle_restart_strategy(Child, State); |
| 351 | + case add_restart(State) of |
| 352 | + {ok, State1} -> |
| 353 | + handle_restart_strategy(Child, State1); |
| 354 | + {shutdown, State1} -> |
| 355 | + RemainingChildren = lists:keydelete( |
| 356 | + Pid, #child.pid, State1#state.children |
| 357 | + ), |
| 358 | + {shutdown, State1#state{children = RemainingChildren}} |
| 359 | + end; |
327 | 360 | false -> |
328 | 361 | Children = lists:keydelete(Pid, #child.pid, State#state.children), |
329 | 362 | {ok, State#state{children = Children}} |
@@ -367,6 +400,8 @@ should_restart(Reason, transient) -> |
367 | 400 |
|
368 | 401 | loop_terminate([#child{pid = undefined} | Tail], AccRemaining) -> |
369 | 402 | loop_terminate(Tail, AccRemaining); |
| 403 | +loop_terminate([#child{pid = {restarting, _}} | Tail], AccRemaining) -> |
| 404 | + loop_terminate(Tail, AccRemaining); |
370 | 405 | loop_terminate([#child{pid = Pid} = Child | Tail], AccRemaining) when is_pid(Pid) -> |
371 | 406 | do_terminate(Child), |
372 | 407 | loop_terminate(Tail, [Pid | AccRemaining]); |
@@ -485,6 +520,47 @@ verify_shutdown(#child{pid = Pid, shutdown = Timeout} = _Child) -> |
485 | 520 | end |
486 | 521 | end. |
487 | 522 |
|
| 523 | +add_restart( |
| 524 | + #state{ |
| 525 | + intensity = Intensity, period = Period, restart_count = RestartCount, restarts = Restarts |
| 526 | + } = State |
| 527 | +) -> |
| 528 | + Now = erlang:monotonic_time(millisecond), |
| 529 | + Threshold = Now - Period * 1000, |
| 530 | + case can_restart(Intensity, Threshold, Restarts ++ [Now], RestartCount + 1) of |
| 531 | + {true, RestartCount1, Restarts1} -> |
| 532 | + {ok, State#state{ |
| 533 | + restarts = Restarts1, restart_count = RestartCount1 |
| 534 | + }}; |
| 535 | + {false, _RestartCount1, _Restarts1} -> |
| 536 | + % TODO: log supervisor shutdown due to maximum intensity exceeded |
| 537 | + {shutdown, State} |
| 538 | + end. |
| 539 | + |
| 540 | +can_restart(0, _, _, _) -> |
| 541 | + {false, 0, []}; |
| 542 | +can_restart(_, _, _, 0) -> |
| 543 | + {true, 0, []}; |
| 544 | +can_restart(Intensity, Threshold, Restarts, RestartCount) when |
| 545 | + RestartCount >= ?STALE_RESTART_LIMIT |
| 546 | +-> |
| 547 | + {NewCount, Restarts1} = trim_expired_restarts(Threshold, lists:sort(Restarts)), |
| 548 | + can_restart(Intensity, Threshold, Restarts1, NewCount); |
| 549 | +can_restart(Intensity, Threshold, [Restart | _] = Restarts, RestartCount) when |
| 550 | + RestartCount >= Intensity andalso Restart < Threshold |
| 551 | +-> |
| 552 | + {NewCount, Restarts1} = trim_expired_restarts(Threshold, lists:sort(Restarts)), |
| 553 | + can_restart(Intensity, Threshold, Restarts1, NewCount); |
| 554 | +can_restart(Intensity, _, Restarts, RestartCount) when RestartCount > Intensity -> |
| 555 | + {false, RestartCount, Restarts}; |
| 556 | +can_restart(Intensity, _, Restarts, RestartCount) when RestartCount =< Intensity -> |
| 557 | + {true, RestartCount, Restarts}. |
| 558 | + |
| 559 | +trim_expired_restarts(Threshold, [Restart | Restarts]) when Restart < Threshold -> |
| 560 | + trim_expired_restarts(Threshold, Restarts); |
| 561 | +trim_expired_restarts(_Threshold, Restarts) -> |
| 562 | + {length(Restarts), Restarts}. |
| 563 | + |
488 | 564 | child_to_info(#child{id = Id, pid = Pid, type = Type, modules = Modules}) -> |
489 | 565 | Child = |
490 | 566 | case Pid of |
|
0 commit comments