Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit df8b91e

Browse files
Limit and filter the number of backfill points to get from the database (#13879)
There is no need to grab thousands of backfill points when we only need 5 to make the `/backfill` request with. We need to grab a few extra in case the first few aren't visible in the history. Previously, we grabbed thousands of backfill points from the database, then sorted and filtered them in the app. Fetching the 4.6k backfill points for `#matrix:matrix.org` from the database takes ~50ms - ~570ms so it's not like this saves a lot of time 🤷. But it might save us more time now that `get_backfill_points_in_room`/`get_insertion_event_backward_extremities_in_room` are more complicated after #13635 This PR moves the filtering and limiting to the SQL query so we just have less data to work with in the first place. Part of #13356
1 parent d768c50 commit df8b91e

File tree

4 files changed

+198
-82
lines changed

4 files changed

+198
-82
lines changed

changelog.d/13879.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Only pull relevant backfill points from the database based on the current depth and limit (instead of all) every time we want to `/backfill`.

synapse/handlers/federation.py

Lines changed: 61 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from unpaddedbase64 import decode_base64
3939

4040
from synapse import event_auth
41-
from synapse.api.constants import EventContentFields, EventTypes, Membership
41+
from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
4242
from synapse.api.errors import (
4343
AuthError,
4444
CodeMessageException,
@@ -211,7 +211,7 @@ async def _maybe_backfill_inner(
211211
current_depth: int,
212212
limit: int,
213213
*,
214-
processing_start_time: int,
214+
processing_start_time: Optional[int],
215215
) -> bool:
216216
"""
217217
Checks whether the `current_depth` is at or approaching any backfill
@@ -223,20 +223,36 @@ async def _maybe_backfill_inner(
223223
room_id: The room to backfill in.
224224
current_depth: The depth to check at for any upcoming backfill points.
225225
limit: The max number of events to request from the remote federated server.
226-
processing_start_time: The time when `maybe_backfill` started
227-
processing. Only used for timing.
226+
processing_start_time: The time when `maybe_backfill` started processing.
227+
Only used for timing. If `None`, no timing observation will be made.
228228
"""
229229
backwards_extremities = [
230230
_BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY)
231-
for event_id, depth in await self.store.get_backfill_points_in_room(room_id)
231+
for event_id, depth in await self.store.get_backfill_points_in_room(
232+
room_id=room_id,
233+
current_depth=current_depth,
234+
# We only need to end up with 5 extremities combined with the
235+
# insertion event extremities to make the `/backfill` request
236+
# but fetch an order of magnitude more to make sure there is
237+
# enough even after we filter them by whether visible in the
238+
# history. This isn't fool-proof as all backfill points within
239+
# our limit could be filtered out but seems like a good amount
240+
# to try with at least.
241+
limit=50,
242+
)
232243
]
233244

234245
insertion_events_to_be_backfilled: List[_BackfillPoint] = []
235246
if self.hs.config.experimental.msc2716_enabled:
236247
insertion_events_to_be_backfilled = [
237248
_BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT)
238249
for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room(
239-
room_id
250+
room_id=room_id,
251+
current_depth=current_depth,
252+
# We only need to end up with 5 extremities combined with
253+
# the backfill points to make the `/backfill` request ...
254+
# (see the other comment above for more context).
255+
limit=50,
240256
)
241257
]
242258
logger.debug(
@@ -245,10 +261,6 @@ async def _maybe_backfill_inner(
245261
insertion_events_to_be_backfilled,
246262
)
247263

248-
if not backwards_extremities and not insertion_events_to_be_backfilled:
249-
logger.debug("Not backfilling as no extremeties found.")
250-
return False
251-
252264
# we now have a list of potential places to backpaginate from. We prefer to
253265
# start with the most recent (ie, max depth), so let's sort the list.
254266
sorted_backfill_points: List[_BackfillPoint] = sorted(
@@ -269,6 +281,33 @@ async def _maybe_backfill_inner(
269281
sorted_backfill_points,
270282
)
271283

284+
# If we have no backfill points lower than the `current_depth` then
285+
# either we can a) bail or b) still attempt to backfill. We opt to try
286+
# backfilling anyway just in case we do get relevant events.
287+
if not sorted_backfill_points and current_depth != MAX_DEPTH:
288+
logger.debug(
289+
"_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points."
290+
)
291+
return await self._maybe_backfill_inner(
292+
room_id=room_id,
293+
# We use `MAX_DEPTH` so that we find all backfill points next
294+
# time (all events are below the `MAX_DEPTH`)
295+
current_depth=MAX_DEPTH,
296+
limit=limit,
297+
# We don't want to start another timing observation from this
298+
# nested recursive call. The top-most call can record the time
299+
# overall otherwise the smaller one will throw off the results.
300+
processing_start_time=None,
301+
)
302+
303+
# Even after recursing with `MAX_DEPTH`, we didn't find any
304+
# backward extremities to backfill from.
305+
if not sorted_backfill_points:
306+
logger.debug(
307+
"_maybe_backfill_inner: Not backfilling as no backward extremeties found."
308+
)
309+
return False
310+
272311
# If we're approaching an extremity we trigger a backfill, otherwise we
273312
# no-op.
274313
#
@@ -278,47 +317,16 @@ async def _maybe_backfill_inner(
278317
# chose more than one times the limit in case of failure, but choosing a
279318
# much larger factor will result in triggering a backfill request much
280319
# earlier than necessary.
281-
#
282-
# XXX: shouldn't we do this *after* the filter by depth below? Again, we don't
283-
# care about events that have happened after our current position.
284-
#
285-
max_depth = sorted_backfill_points[0].depth
286-
if current_depth - 2 * limit > max_depth:
320+
max_depth_of_backfill_points = sorted_backfill_points[0].depth
321+
if current_depth - 2 * limit > max_depth_of_backfill_points:
287322
logger.debug(
288323
"Not backfilling as we don't need to. %d < %d - 2 * %d",
289-
max_depth,
324+
max_depth_of_backfill_points,
290325
current_depth,
291326
limit,
292327
)
293328
return False
294329

295-
# We ignore extremities that have a greater depth than our current depth
296-
# as:
297-
# 1. we don't really care about getting events that have happened
298-
# after our current position; and
299-
# 2. we have likely previously tried and failed to backfill from that
300-
# extremity, so to avoid getting "stuck" requesting the same
301-
# backfill repeatedly we drop those extremities.
302-
#
303-
# However, we need to check that the filtered extremities are non-empty.
304-
# If they are empty then either we can a) bail or b) still attempt to
305-
# backfill. We opt to try backfilling anyway just in case we do get
306-
# relevant events.
307-
#
308-
filtered_sorted_backfill_points = [
309-
t for t in sorted_backfill_points if t.depth <= current_depth
310-
]
311-
if filtered_sorted_backfill_points:
312-
logger.debug(
313-
"_maybe_backfill_inner: backfill points before current depth: %s",
314-
filtered_sorted_backfill_points,
315-
)
316-
sorted_backfill_points = filtered_sorted_backfill_points
317-
else:
318-
logger.debug(
319-
"_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway."
320-
)
321-
322330
# For performance's sake, we only want to paginate from a particular extremity
323331
# if we can actually see the events we'll get. Otherwise, we'd just spend a lot
324332
# of resources to get redacted events. We check each extremity in turn and
@@ -452,10 +460,15 @@ async def try_backfill(domains: Collection[str]) -> bool:
452460

453461
return False
454462

455-
processing_end_time = self.clock.time_msec()
456-
backfill_processing_before_timer.observe(
457-
(processing_end_time - processing_start_time) / 1000
458-
)
463+
# If we have the `processing_start_time`, then we can make an
464+
# observation. We wouldn't have the `processing_start_time` in the case
465+
# where `_maybe_backfill_inner` is recursively called to find any
466+
# backfill points regardless of `current_depth`.
467+
if processing_start_time is not None:
468+
processing_end_time = self.clock.time_msec()
469+
backfill_processing_before_timer.observe(
470+
(processing_end_time - processing_start_time) / 1000
471+
)
459472

460473
success = await try_backfill(likely_domains)
461474
if success:

synapse/storage/databases/main/event_federation.py

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -726,17 +726,35 @@ def _get_auth_chain_difference_txn(
726726
async def get_backfill_points_in_room(
727727
self,
728728
room_id: str,
729+
current_depth: int,
730+
limit: int,
729731
) -> List[Tuple[str, int]]:
730732
"""
731-
Gets the oldest events(backwards extremities) in the room along with the
732-
approximate depth. Sorted by depth, highest to lowest (descending).
733+
Get the backward extremities to backfill from in the room along with the
734+
approximate depth.
735+
736+
Only returns events that are at a depth lower than or
737+
equal to the `current_depth`. Sorted by depth, highest to lowest (descending)
738+
so the closest events to the `current_depth` are first in the list.
739+
740+
We ignore extremities that are newer than the user's current scroll position
741+
(ie, those with depth greater than `current_depth`) as:
742+
1. we don't really care about getting events that have happened
743+
after our current position; and
744+
2. by the nature of paginating and scrolling back, we have likely
745+
previously tried and failed to backfill from that extremity, so
746+
to avoid getting "stuck" requesting the same backfill repeatedly
747+
we drop those extremities.
733748
734749
Args:
735750
room_id: Room where we want to find the oldest events
751+
current_depth: The depth at the user's current scrollback position
752+
limit: The max number of backfill points to return
736753
737754
Returns:
738755
List of (event_id, depth) tuples. Sorted by depth, highest to lowest
739-
(descending)
756+
(descending) so the closest events to the `current_depth` are first
757+
in the list.
740758
"""
741759

742760
def get_backfill_points_in_room_txn(
@@ -784,6 +802,18 @@ def get_backfill_points_in_room_txn(
784802
* necessarily safe to assume that it will have been completed.
785803
*/
786804
AND edge.is_state is ? /* False */
805+
/**
806+
* We only want backwards extremities that are older than or at
807+
* the same position of the given `current_depth` (where older
808+
* means less than the given depth) because we're looking backwards
809+
* from the `current_depth` when backfilling.
810+
*
811+
* current_depth (ignore events that come after this, ignore 2-4)
812+
* |
813+
* ▼
814+
* <oldest-in-time> [0]<--[1]<--[2]<--[3]<--[4] <newest-in-time>
815+
*/
816+
AND event.depth <= ? /* current_depth */
787817
/**
788818
* Exponential back-off (up to the upper bound) so we don't retry the
789819
* same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc.
@@ -798,11 +828,13 @@ def get_backfill_points_in_room_txn(
798828
OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
799829
)
800830
/**
801-
* Sort from highest to the lowest depth. Then tie-break on
802-
* alphabetical order of the event_ids so we get a consistent
803-
* ordering which is nice when asserting things in tests.
831+
* Sort from highest (closest to the `current_depth`) to the lowest depth
832+
* because the closest are most relevant to backfill from first.
833+
* Then tie-break on alphabetical order of the event_ids so we get a
834+
* consistent ordering which is nice when asserting things in tests.
804835
*/
805836
ORDER BY event.depth DESC, backward_extrem.event_id DESC
837+
LIMIT ?
806838
"""
807839

808840
if isinstance(self.database_engine, PostgresEngine):
@@ -817,9 +849,11 @@ def get_backfill_points_in_room_txn(
817849
(
818850
room_id,
819851
False,
852+
current_depth,
820853
self._clock.time_msec(),
821854
1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
822855
1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
856+
limit,
823857
),
824858
)
825859

@@ -835,18 +869,34 @@ def get_backfill_points_in_room_txn(
835869
async def get_insertion_event_backward_extremities_in_room(
836870
self,
837871
room_id: str,
872+
current_depth: int,
873+
limit: int,
838874
) -> List[Tuple[str, int]]:
839875
"""
840876
Get the insertion events we know about that we haven't backfilled yet
841-
along with the approximate depth. Sorted by depth, highest to lowest
842-
(descending).
877+
along with the approximate depth. Only returns insertion events that are
878+
at a depth lower than or equal to the `current_depth`. Sorted by depth,
879+
highest to lowest (descending) so the closest events to the
880+
`current_depth` are first in the list.
881+
882+
We ignore insertion events that are newer than the user's current scroll
883+
position (ie, those with depth greater than `current_depth`) as:
884+
1. we don't really care about getting events that have happened
885+
after our current position; and
886+
2. by the nature of paginating and scrolling back, we have likely
887+
previously tried and failed to backfill from that insertion event, so
888+
to avoid getting "stuck" requesting the same backfill repeatedly
889+
we drop those insertion event.
843890
844891
Args:
845892
room_id: Room where we want to find the oldest events
893+
current_depth: The depth at the user's current scrollback position
894+
limit: The max number of insertion event extremities to return
846895
847896
Returns:
848897
List of (event_id, depth) tuples. Sorted by depth, highest to lowest
849-
(descending)
898+
(descending) so the closest events to the `current_depth` are first
899+
in the list.
850900
"""
851901

852902
def get_insertion_event_backward_extremities_in_room_txn(
@@ -869,6 +919,18 @@ def get_insertion_event_backward_extremities_in_room_txn(
869919
AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id
870920
WHERE
871921
insertion_event_extremity.room_id = ?
922+
/**
923+
* We only want extremities that are older than or at
924+
* the same position of the given `current_depth` (where older
925+
* means less than the given depth) because we're looking backwards
926+
* from the `current_depth` when backfilling.
927+
*
928+
* current_depth (ignore events that come after this, ignore 2-4)
929+
* |
930+
* ▼
931+
* <oldest-in-time> [0]<--[1]<--[2]<--[3]<--[4] <newest-in-time>
932+
*/
933+
AND event.depth <= ? /* current_depth */
872934
/**
873935
* Exponential back-off (up to the upper bound) so we don't retry the
874936
* same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc
@@ -883,11 +945,13 @@ def get_insertion_event_backward_extremities_in_room_txn(
883945
OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
884946
)
885947
/**
886-
* Sort from highest to the lowest depth. Then tie-break on
887-
* alphabetical order of the event_ids so we get a consistent
888-
* ordering which is nice when asserting things in tests.
948+
* Sort from highest (closest to the `current_depth`) to the lowest depth
949+
* because the closest are most relevant to backfill from first.
950+
* Then tie-break on alphabetical order of the event_ids so we get a
951+
* consistent ordering which is nice when asserting things in tests.
889952
*/
890953
ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC
954+
LIMIT ?
891955
"""
892956

893957
if isinstance(self.database_engine, PostgresEngine):
@@ -901,9 +965,11 @@ def get_insertion_event_backward_extremities_in_room_txn(
901965
sql % (least_function,),
902966
(
903967
room_id,
968+
current_depth,
904969
self._clock.time_msec(),
905970
1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
906971
1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
972+
limit,
907973
),
908974
)
909975
return cast(List[Tuple[str, int]], txn.fetchall())

0 commit comments

Comments
 (0)