6565 alarmed ,
6666 % % is monitoring enabled? false on unsupported
6767 % % platforms
68- enabled
68+ enabled ,
69+ % % number of retries to enable monitoring if it fails
70+ % % on start-up
71+ retries ,
72+ % % Interval between retries
73+ interval
6974}).
7075
7176% %----------------------------------------------------------------------------
@@ -114,20 +119,17 @@ start_link(Args) ->
114119
115120init ([Limit ]) ->
116121 Dir = dir (),
122+ {ok , Retries } = application :get_env (rabbit , disk_monitor_failure_retries ),
123+ {ok , Interval } = application :get_env (rabbit , disk_monitor_failure_retry_interval ),
117124 State = # state {dir = Dir ,
118125 min_interval = ? DEFAULT_MIN_DISK_CHECK_INTERVAL ,
119126 max_interval = ? DEFAULT_MAX_DISK_CHECK_INTERVAL ,
120127 alarmed = false ,
121- enabled = true },
122- case {catch get_disk_free (Dir ),
123- vm_memory_monitor :get_total_memory ()} of
124- {N1 , N2 } when is_integer (N1 ), is_integer (N2 ) ->
125- {ok , start_timer (set_disk_limits (State , Limit ))};
126- Err ->
127- rabbit_log :info (" Disabling disk free space monitoring "
128- " on unsupported platform:~n~p~n " , [Err ]),
129- {ok , State # state {enabled = false }}
130- end .
128+ enabled = true ,
129+ limit = Limit ,
130+ retries = Retries ,
131+ interval = Interval },
132+ {ok , enable (State )}.
131133
132134handle_call (get_disk_free_limit , _From , State = # state {limit = Limit }) ->
133135 {reply , Limit , State };
@@ -161,6 +163,8 @@ handle_call(_Request, _From, State) ->
161163handle_cast (_Request , State ) ->
162164 {noreply , State }.
163165
166+ handle_info (try_enable , # state {retries = Retries } = State ) ->
167+ {noreply , enable (State # state {retries = Retries - 1 })};
164168handle_info (update , State ) ->
165169 {noreply , start_timer (internal_update (State ))};
166170
@@ -246,7 +250,7 @@ interpret_limit(Absolute) ->
246250
247251emit_update_info (StateStr , CurrentFree , Limit ) ->
248252 rabbit_log :info (
249- " Disk free space ~s . Free bytes:~p Limit:~p~n " ,
253+ " Free disk space is ~s . Free bytes: ~p . Limit: ~p~n " ,
250254 [StateStr , CurrentFree , Limit ]).
251255
252256start_timer (State ) ->
@@ -261,3 +265,20 @@ interval(#state{limit = Limit,
261265 max_interval = MaxInterval }) ->
262266 IdealInterval = 2 * (Actual - Limit ) / ? FAST_RATE ,
263267 trunc (erlang :max (MinInterval , erlang :min (MaxInterval , IdealInterval ))).
268+
269+ enable (# state {retries = 0 } = State ) ->
270+ State ;
271+ enable (# state {dir = Dir , interval = Interval , limit = Limit , retries = Retries }
272+ = State ) ->
273+ case {catch get_disk_free (Dir ),
274+ vm_memory_monitor :get_total_memory ()} of
275+ {N1 , N2 } when is_integer (N1 ), is_integer (N2 ) ->
276+ rabbit_log :info (" Enabling free disk space monitoring~n " , []),
277+ start_timer (set_disk_limits (State , Limit ));
278+ Err ->
279+ rabbit_log :info (" Free disk space monitor encountered an error "
280+ " (e.g. failed to parse output from OS tools): ~p , retries left: ~s~n " ,
281+ [Err , Retries ]),
282+ timer :send_after (Interval , self (), try_enable ),
283+ State # state {enabled = false }
284+ end .
0 commit comments