@@ -289,6 +289,12 @@ retry_timeout() ->
289289 undefined -> 30000
290290 end .
291291
292+ retry_limit () ->
293+ case application :get_env (rabbit , khepri_leader_wait_retry_limit ) of
294+ {ok , T } -> T ;
295+ undefined -> 10
296+ end .
297+
292298% % @private
293299
294300-spec init (IsVirgin ) -> Ret when
@@ -305,11 +311,7 @@ init(IsVirgin) ->
305311 " Found the following metadata store members: ~p " , [Members ],
306312 #{domain => ? RMQLOG_DOMAIN_DB }),
307313 maybe
308- ? LOG_DEBUG (
309- " Khepri-based " ? RA_FRIENDLY_NAME " catching up on "
310- " replication to the Raft cluster leader" , [],
311- #{domain => ? RMQLOG_DOMAIN_DB }),
312- ok ?= fence (retry_timeout ()),
314+ ok ?= await_replication (),
313315 ? LOG_DEBUG (
314316 " local Khepri-based " ? RA_FRIENDLY_NAME " member is caught "
315317 " up to the Raft cluster leader" , [],
@@ -331,6 +333,24 @@ init(IsVirgin) ->
331333 end
332334 end .
333335
336+ await_replication () ->
337+ await_replication (retry_timeout (), retry_limit ()).
338+
339+ await_replication (_Timeout , 0 ) ->
340+ {error , timeout };
341+ await_replication (Timeout , Retries ) ->
342+ ? LOG_DEBUG (
343+ " Khepri-based " ? RA_FRIENDLY_NAME " waiting to catch up on replication "
344+ " to the Raft cluster leader. Waiting for ~tb ms, ~tb retries left" ,
345+ [Timeout , Retries ],
346+ #{domain => ? RMQLOG_DOMAIN_DB }),
347+ case fence (Timeout ) of
348+ ok ->
349+ ok ;
350+ {error , timeout } ->
351+ await_replication (Timeout , Retries - 1 )
352+ end .
353+
334354% % @private
335355
336356can_join_cluster (DiscoveryNode ) when is_atom (DiscoveryNode ) ->
0 commit comments