Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit f63d4a3

Browse files
author
Mathieu Velten
authored
Regularly try to wake up dests instead of waiting for next PDU/EDU (#15743)
1 parent d939120 commit f63d4a3

File tree

3 files changed

+26
-31
lines changed

3 files changed

+26
-31
lines changed

changelog.d/15743.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Regularly try to send transactions to other servers after they failed instead of waiting for a new event to be available before trying.

synapse/federation/sender/__init__.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,8 @@
109109
110110
If a remote server is unreachable over federation, we back off from that server,
111111
with an exponentially-increasing retry interval.
112-
Whilst we don't automatically retry after the interval, we prevent making new attempts
113-
until such time as the back-off has cleared.
114-
Once the back-off is cleared and a new PDU or EDU arrives for transmission, the transmission
115-
loop resumes and empties the queue by making federation requests.
112+
We automatically retry after the retry interval expires (roughly, the logic to do so
113+
being triggered every minute).
116114
117115
If the backoff grows too large (> 1 hour), the in-memory queue is emptied (to prevent
118116
unbounded growth) and Catch-Up Mode is entered.
@@ -145,7 +143,6 @@
145143
from typing_extensions import Literal
146144

147145
from twisted.internet import defer
148-
from twisted.internet.interfaces import IDelayedCall
149146

150147
import synapse.metrics
151148
from synapse.api.presence import UserPresenceState
@@ -184,14 +181,18 @@
184181
"Total number of PDUs queued for sending across all destinations",
185182
)
186183

187-
# Time (in s) after Synapse's startup that we will begin to wake up destinations
188-
# that have catch-up outstanding.
189-
CATCH_UP_STARTUP_DELAY_SEC = 15
184+
# Time (in s) to wait before trying to wake up destinations that have
185+
# catch-up outstanding. This will also be the delay applied at startup
186+
# before trying the same.
187+
# Please note that rate limiting still applies, so while the loop is
188+
# executed every X seconds the destinations may not be wake up because
189+
# they are being rate limited following previous attempt failures.
190+
WAKEUP_RETRY_PERIOD_SEC = 60
190191

191192
# Time (in s) to wait in between waking up each destination, i.e. one destination
192-
# will be woken up every <x> seconds after Synapse's startup until we have woken
193-
# every destination has outstanding catch-up.
194-
CATCH_UP_STARTUP_INTERVAL_SEC = 5
193+
# will be woken up every <x> seconds until we have woken every destination
194+
# has outstanding catch-up.
195+
WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC = 5
195196

196197

197198
class AbstractFederationSender(metaclass=abc.ABCMeta):
@@ -415,12 +416,10 @@ def __init__(self, hs: "HomeServer"):
415416
/ hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
416417
)
417418

418-
# wake up destinations that have outstanding PDUs to be caught up
419-
self._catchup_after_startup_timer: Optional[
420-
IDelayedCall
421-
] = self.clock.call_later(
422-
CATCH_UP_STARTUP_DELAY_SEC,
419+
# Regularly wake up destinations that have outstanding PDUs to be caught up
420+
self.clock.looping_call(
423421
run_as_background_process,
422+
WAKEUP_RETRY_PERIOD_SEC * 1000.0,
424423
"wake_destinations_needing_catchup",
425424
self._wake_destinations_needing_catchup,
426425
)
@@ -966,7 +965,6 @@ async def _wake_destinations_needing_catchup(self) -> None:
966965

967966
if not destinations_to_wake:
968967
# finished waking all destinations!
969-
self._catchup_after_startup_timer = None
970968
break
971969

972970
last_processed = destinations_to_wake[-1]
@@ -983,4 +981,4 @@ async def _wake_destinations_needing_catchup(self) -> None:
983981
last_processed,
984982
)
985983
self.wake_destination(destination)
986-
await self.clock.sleep(CATCH_UP_STARTUP_INTERVAL_SEC)
984+
await self.clock.sleep(WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC)

tests/federation/test_federation_catch_up.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -431,28 +431,24 @@ def test_catch_up_on_synapse_startup(self) -> None:
431431
# ACT: call _wake_destinations_needing_catchup
432432

433433
# patch wake_destination to just count the destinations instead
434-
woken = []
434+
woken = set()
435435

436436
def wake_destination_track(destination: str) -> None:
437-
woken.append(destination)
437+
woken.add(destination)
438438

439439
self.federation_sender.wake_destination = wake_destination_track # type: ignore[assignment]
440440

441-
# cancel the pre-existing timer for _wake_destinations_needing_catchup
442-
# this is because we are calling it manually rather than waiting for it
443-
# to be called automatically
444-
assert self.federation_sender._catchup_after_startup_timer is not None
445-
self.federation_sender._catchup_after_startup_timer.cancel()
446-
447-
self.get_success(
448-
self.federation_sender._wake_destinations_needing_catchup(), by=5.0
449-
)
441+
# We wait quite long so that all dests can be woken up, since there is a delay
442+
# between them.
443+
self.pump(by=5.0)
450444

451445
# ASSERT (_wake_destinations_needing_catchup):
452446
# - all remotes are woken up, save for zzzerver
453447
self.assertNotIn("zzzerver", woken)
454-
# - all destinations are woken exactly once; they appear once in woken.
455-
self.assertCountEqual(woken, server_names[:-1])
448+
# - all destinations are woken, potentially more than once, since the
449+
# wake up is called regularly and we don't ack in this test that a transaction
450+
# has been successfully sent.
451+
self.assertCountEqual(woken, set(server_names[:-1]))
456452

457453
def test_not_latest_event(self) -> None:
458454
"""Test that we send the latest event in the room even if its not ours."""

0 commit comments

Comments
 (0)