@@ -920,15 +920,26 @@ async def backfill(self, dest, room_id, limit, extremities):
920920
921921 return events
922922
923- async def maybe_backfill (self , room_id , current_depth ):
923+ async def maybe_backfill (
924+ self , room_id : str , current_depth : int , limit : int
925+ ) -> bool :
924926 """Checks the database to see if we should backfill before paginating,
925927 and if so do.
928+
929+ Args:
930+ room_id
931+ current_depth: The depth from which we're paginating from. This is
932+ used to decide if we should backfill and what extremities to
933+ use.
934+ limit: The number of events that the pagination request will
935+ return. This is used as part of the heuristic to decide if we
936+ should back paginate.
926937 """
927938 extremities = await self .store .get_oldest_events_with_depth_in_room (room_id )
928939
929940 if not extremities :
930941 logger .debug ("Not backfilling as no extremeties found." )
931- return
942+ return False
932943
933944 # We only want to paginate if we can actually see the events we'll get,
934945 # as otherwise we'll just spend a lot of resources to get redacted
@@ -981,16 +992,54 @@ async def maybe_backfill(self, room_id, current_depth):
981992 sorted_extremeties_tuple = sorted (extremities .items (), key = lambda e : - int (e [1 ]))
982993 max_depth = sorted_extremeties_tuple [0 ][1 ]
983994
995+ # If we're approaching an extremity we trigger a backfill, otherwise we
996+ # no-op.
997+ #
998+ # We chose twice the limit here as then clients paginating backwards
999+ # will send pagination requests that trigger backfill at least twice
1000+ # using the most recent extremity before it gets removed (see below). We
1001+ # chose more than one times the limit in case of failure, but choosing a
1002+ # much larger factor will result in triggering a backfill request much
1003+ # earlier than necessary.
1004+ if current_depth - 2 * limit > max_depth :
1005+ logger .debug (
1006+ "Not backfilling as we don't need to. %d < %d - 2 * %d" ,
1007+ max_depth ,
1008+ current_depth ,
1009+ limit ,
1010+ )
1011+ return False
1012+
1013+ logger .debug (
1014+ "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s" ,
1015+ room_id ,
1016+ current_depth ,
1017+ max_depth ,
1018+ sorted_extremeties_tuple ,
1019+ )
1020+
1021+ # We ignore extremities that have a greater depth than our current depth
1022+ # as:
1023+ # 1. we don't really care about getting events that have happened
1024+ # before our current position; and
1025+ # 2. we have likely previously tried and failed to backfill from that
1026+ # extremity, so to avoid getting "stuck" requesting the same
1027+ # backfill repeatedly we drop those extremities.
1028+ filtered_sorted_extremeties_tuple = [
1029+ t for t in sorted_extremeties_tuple if int (t [1 ]) <= current_depth
1030+ ]
1031+
1032+ # However, we need to check that the filtered extremities are non-empty.
1033+ # If they are empty then either we can a) bail or b) still attempt to
1034+ # backill. We opt to try backfilling anyway just in case we do get
1035+ # relevant events.
1036+ if filtered_sorted_extremeties_tuple :
1037+ sorted_extremeties_tuple = filtered_sorted_extremeties_tuple
1038+
9841039 # We don't want to specify too many extremities as it causes the backfill
9851040 # request URI to be too long.
9861041 extremities = dict (sorted_extremeties_tuple [:5 ])
9871042
988- if current_depth > max_depth :
989- logger .debug (
990- "Not backfilling as we don't need to. %d < %d" , max_depth , current_depth
991- )
992- return
993-
9941043 # Now we need to decide which hosts to hit first.
9951044
9961045 # First we try hosts that are already in the room
0 commit comments