3838from  unpaddedbase64  import  decode_base64 
3939
4040from  synapse  import  event_auth 
41- from  synapse .api .constants  import  EventContentFields , EventTypes , Membership 
41+ from  synapse .api .constants  import  MAX_DEPTH ,  EventContentFields , EventTypes , Membership 
4242from  synapse .api .errors  import  (
4343    AuthError ,
4444    CodeMessageException ,
@@ -211,7 +211,7 @@ async def _maybe_backfill_inner(
211211        current_depth : int ,
212212        limit : int ,
213213        * ,
214-         processing_start_time : int ,
214+         processing_start_time : Optional [ int ] ,
215215    ) ->  bool :
216216        """ 
217217        Checks whether the `current_depth` is at or approaching any backfill 
@@ -223,20 +223,36 @@ async def _maybe_backfill_inner(
223223            room_id: The room to backfill in. 
224224            current_depth: The depth to check at for any upcoming backfill points. 
225225            limit: The max number of events to request from the remote federated server. 
226-             processing_start_time: The time when `maybe_backfill` started 
227-                 processing.  Only used for timing. 
226+             processing_start_time: The time when `maybe_backfill` started processing.  
227+                 Only used for timing. If `None`, no timing observation will be made . 
228228        """ 
229229        backwards_extremities  =  [
230230            _BackfillPoint (event_id , depth , _BackfillPointType .BACKWARDS_EXTREMITY )
231-             for  event_id , depth  in  await  self .store .get_backfill_points_in_room (room_id )
231+             for  event_id , depth  in  await  self .store .get_backfill_points_in_room (
232+                 room_id = room_id ,
233+                 current_depth = current_depth ,
234+                 # We only need to end up with 5 extremities combined with the 
235+                 # insertion event extremities to make the `/backfill` request 
236+                 # but fetch an order of magnitude more to make sure there is 
237+                 # enough even after we filter them by whether visible in the 
238+                 # history. This isn't fool-proof as all backfill points within 
239+                 # our limit could be filtered out but seems like a good amount 
240+                 # to try with at least. 
241+                 limit = 50 ,
242+             )
232243        ]
233244
234245        insertion_events_to_be_backfilled : List [_BackfillPoint ] =  []
235246        if  self .hs .config .experimental .msc2716_enabled :
236247            insertion_events_to_be_backfilled  =  [
237248                _BackfillPoint (event_id , depth , _BackfillPointType .INSERTION_PONT )
238249                for  event_id , depth  in  await  self .store .get_insertion_event_backward_extremities_in_room (
239-                     room_id 
250+                     room_id = room_id ,
251+                     current_depth = current_depth ,
252+                     # We only need to end up with 5 extremities combined with 
253+                     # the backfill points to make the `/backfill` request ... 
254+                     # (see the other comment above for more context). 
255+                     limit = 50 ,
240256                )
241257            ]
242258        logger .debug (
@@ -245,10 +261,6 @@ async def _maybe_backfill_inner(
245261            insertion_events_to_be_backfilled ,
246262        )
247263
248-         if  not  backwards_extremities  and  not  insertion_events_to_be_backfilled :
249-             logger .debug ("Not backfilling as no extremeties found." )
250-             return  False 
251- 
252264        # we now have a list of potential places to backpaginate from. We prefer to 
253265        # start with the most recent (ie, max depth), so let's sort the list. 
254266        sorted_backfill_points : List [_BackfillPoint ] =  sorted (
@@ -269,6 +281,33 @@ async def _maybe_backfill_inner(
269281            sorted_backfill_points ,
270282        )
271283
284+         # If we have no backfill points lower than the `current_depth` then 
285+         # either we can a) bail or b) still attempt to backfill. We opt to try 
286+         # backfilling anyway just in case we do get relevant events. 
287+         if  not  sorted_backfill_points  and  current_depth  !=  MAX_DEPTH :
288+             logger .debug (
289+                 "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points." 
290+             )
291+             return  await  self ._maybe_backfill_inner (
292+                 room_id = room_id ,
293+                 # We use `MAX_DEPTH` so that we find all backfill points next 
294+                 # time (all events are below the `MAX_DEPTH`) 
295+                 current_depth = MAX_DEPTH ,
296+                 limit = limit ,
297+                 # We don't want to start another timing observation from this 
298+                 # nested recursive call. The top-most call can record the time 
299+                 # overall otherwise the smaller one will throw off the results. 
300+                 processing_start_time = None ,
301+             )
302+ 
303+         # Even after recursing with `MAX_DEPTH`, we didn't find any 
304+         # backward extremities to backfill from. 
305+         if  not  sorted_backfill_points :
306+             logger .debug (
307+                 "_maybe_backfill_inner: Not backfilling as no backward extremeties found." 
308+             )
309+             return  False 
310+ 
272311        # If we're approaching an extremity we trigger a backfill, otherwise we 
273312        # no-op. 
274313        # 
@@ -278,47 +317,16 @@ async def _maybe_backfill_inner(
278317        # chose more than one times the limit in case of failure, but choosing a 
279318        # much larger factor will result in triggering a backfill request much 
280319        # earlier than necessary. 
281-         # 
282-         # XXX: shouldn't we do this *after* the filter by depth below? Again, we don't 
283-         # care about events that have happened after our current position. 
284-         # 
285-         max_depth  =  sorted_backfill_points [0 ].depth 
286-         if  current_depth  -  2  *  limit  >  max_depth :
320+         max_depth_of_backfill_points  =  sorted_backfill_points [0 ].depth 
321+         if  current_depth  -  2  *  limit  >  max_depth_of_backfill_points :
287322            logger .debug (
288323                "Not backfilling as we don't need to. %d < %d - 2 * %d" ,
289-                 max_depth ,
324+                 max_depth_of_backfill_points ,
290325                current_depth ,
291326                limit ,
292327            )
293328            return  False 
294329
295-         # We ignore extremities that have a greater depth than our current depth 
296-         # as: 
297-         #    1. we don't really care about getting events that have happened 
298-         #       after our current position; and 
299-         #    2. we have likely previously tried and failed to backfill from that 
300-         #       extremity, so to avoid getting "stuck" requesting the same 
301-         #       backfill repeatedly we drop those extremities. 
302-         # 
303-         # However, we need to check that the filtered extremities are non-empty. 
304-         # If they are empty then either we can a) bail or b) still attempt to 
305-         # backfill. We opt to try backfilling anyway just in case we do get 
306-         # relevant events. 
307-         # 
308-         filtered_sorted_backfill_points  =  [
309-             t  for  t  in  sorted_backfill_points  if  t .depth  <=  current_depth 
310-         ]
311-         if  filtered_sorted_backfill_points :
312-             logger .debug (
313-                 "_maybe_backfill_inner: backfill points before current depth: %s" ,
314-                 filtered_sorted_backfill_points ,
315-             )
316-             sorted_backfill_points  =  filtered_sorted_backfill_points 
317-         else :
318-             logger .debug (
319-                 "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway." 
320-             )
321- 
322330        # For performance's sake, we only want to paginate from a particular extremity 
323331        # if we can actually see the events we'll get. Otherwise, we'd just spend a lot 
324332        # of resources to get redacted events. We check each extremity in turn and 
@@ -452,10 +460,15 @@ async def try_backfill(domains: Collection[str]) -> bool:
452460
453461            return  False 
454462
455-         processing_end_time  =  self .clock .time_msec ()
456-         backfill_processing_before_timer .observe (
457-             (processing_end_time  -  processing_start_time ) /  1000 
458-         )
463+         # If we have the `processing_start_time`, then we can make an 
464+         # observation. We wouldn't have the `processing_start_time` in the case 
465+         # where `_maybe_backfill_inner` is recursively called to find any 
466+         # backfill points regardless of `current_depth`. 
467+         if  processing_start_time  is  not None :
468+             processing_end_time  =  self .clock .time_msec ()
469+             backfill_processing_before_timer .observe (
470+                 (processing_end_time  -  processing_start_time ) /  1000 
471+             )
459472
460473        success  =  await  try_backfill (likely_domains )
461474        if  success :
0 commit comments