140140 Iterable ,
141141 List ,
142142 Optional ,
143- Set ,
144143 Tuple ,
145144)
146145
170169 run_as_background_process ,
171170 wrap_as_background_process ,
172171)
173- from synapse .types import JsonDict , ReadReceipt , RoomStreamToken , StrCollection
172+ from synapse .types import (
173+ JsonDict ,
174+ ReadReceipt ,
175+ RoomStreamToken ,
176+ StrCollection ,
177+ get_domain_from_id ,
178+ )
174179from synapse .util import Clock
175180from synapse .util .metrics import Measure
176181from synapse .util .retryutils import filter_destinations_by_retry_limiter
@@ -297,12 +302,10 @@ class _DestinationWakeupQueue:
297302 # being woken up.
298303 _MAX_TIME_IN_QUEUE = 30.0
299304
300- # The maximum duration in seconds between waking up consecutive destination
301- # queues.
302- _MAX_DELAY = 0.1
303-
304305 sender : "FederationSender" = attr .ib ()
305306 clock : Clock = attr .ib ()
307+ max_delay_s : int = attr .ib ()
308+
306309 queue : "OrderedDict[str, Literal[None]]" = attr .ib (factory = OrderedDict )
307310 processing : bool = attr .ib (default = False )
308311
@@ -332,7 +335,7 @@ async def _handle(self) -> None:
332335 # We also add an upper bound to the delay, to gracefully handle the
333336 # case where the queue only has a few entries in it.
334337 current_sleep_seconds = min (
335- self ._MAX_DELAY , self ._MAX_TIME_IN_QUEUE / len (self .queue )
338+ self .max_delay_s , self ._MAX_TIME_IN_QUEUE / len (self .queue )
336339 )
337340
338341 while self .queue :
@@ -416,19 +419,14 @@ def __init__(self, hs: "HomeServer"):
416419 self ._is_processing = False
417420 self ._last_poked_id = - 1
418421
419- # map from room_id to a set of PerDestinationQueues which we believe are
420- # awaiting a call to flush_read_receipts_for_room. The presence of an entry
421- # here for a given room means that we are rate-limiting RR flushes to that room,
422- # and that there is a pending call to _flush_rrs_for_room in the system.
423- self ._queues_awaiting_rr_flush_by_room : Dict [str , Set [PerDestinationQueue ]] = {}
422+ self ._external_cache = hs .get_external_cache ()
424423
425- self ._rr_txn_interval_per_room_ms = (
426- 1000.0
427- / hs .config .ratelimiting .federation_rr_transactions_per_room_per_second
424+ rr_txn_interval_per_room_s = (
425+ 1.0 / hs .config .ratelimiting .federation_rr_transactions_per_room_per_second
426+ )
427+ self ._destination_wakeup_queue = _DestinationWakeupQueue (
428+ self , self .clock , max_delay_s = rr_txn_interval_per_room_s
428429 )
429-
430- self ._external_cache = hs .get_external_cache ()
431- self ._destination_wakeup_queue = _DestinationWakeupQueue (self , self .clock )
432430
433431 # Regularly wake up destinations that have outstanding PDUs to be caught up
434432 self .clock .looping_call_now (
@@ -745,37 +743,48 @@ async def send_read_receipt(self, receipt: ReadReceipt) -> None:
745743
746744 # Some background on the rate-limiting going on here.
747745 #
748- # It turns out that if we attempt to send out RRs as soon as we get them from
749- # a client, then we end up trying to do several hundred Hz of federation
750- # transactions. (The number of transactions scales as O(N^2) on the size of a
751- # room, since in a large room we have both more RRs coming in, and more servers
752- # to send them to.)
746+ # It turns out that if we attempt to send out RRs as soon as we get them
747+ # from a client, then we end up trying to do several hundred Hz of
748+ # federation transactions. (The number of transactions scales as O(N^2)
749+ # on the size of a room, since in a large room we have both more RRs
750+ # coming in, and more servers to send them to.)
753751 #
754- # This leads to a lot of CPU load, and we end up getting behind. The solution
755- # currently adopted is as follows:
752+ # This leads to a lot of CPU load, and we end up getting behind. The
753+ # solution currently adopted is to differentiate between receipts and
754+ # destinations we should immediately send to, and those we can trickle
755+ # the receipts to.
756756 #
757- # The first receipt in a given room is sent out immediately, at time T0. Any
758- # further receipts are, in theory, batched up for N seconds, where N is calculated
759- # based on the number of servers in the room to achieve a transaction frequency
760- # of around 50Hz. So, for example, if there were 100 servers in the room, then
761- # N would be 100 / 50Hz = 2 seconds.
757+ # The current logic is to send receipts out immediately if:
758+ # - the room is "small", i.e. there's only N servers to send receipts
759+ # to, and so sending out the receipts immediately doesn't cause too
760+ # much load; or
761+ # - the receipt is for an event that happened recently, as users
762+ # notice if receipts are delayed when they know other users are
763+ # currently reading the room; or
764+ # - the receipt is being sent to the server that sent the event, so
765+ # that users see receipts for their own receipts quickly.
762766 #
763- # Then, after T+N, we flush out any receipts that have accumulated, and restart
764- # the timer to flush out more receipts at T+2N, etc. If no receipts accumulate,
765- # we stop the cycle and go back to the start.
767+ # For destinations that we should delay sending the receipt to, we queue
768+ # the receipts up to be sent in the next transaction, but don't trigger
769+ # a new transaction to be sent. We then add the destination to the
770+ # `DestinationWakeupQueue`, which will slowly iterate over each
771+ # destination and trigger a new transaction to be sent.
766772 #
767- # However, in practice, it is often possible to flush out receipts earlier: in
768- # particular, if we are sending a transaction to a given server anyway (for
769- # example, because we have a PDU or a RR in another room to send), then we may
770- # as well send out all of the pending RRs for that server. So it may be that
771- # by the time we get to T+N, we don't actually have any RRs left to send out.
772- # Nevertheless we continue to buffer up RRs for the room in question until we
773- # reach the point that no RRs arrive between timer ticks.
773+ # However, in practice, it is often possible to send out delayed
774+ # receipts earlier: in particular, if we are sending a transaction to a
775+ # given server anyway (for example, because we have a PDU or a RR in
776+ # another room to send), then we may as well send out all of the pending
777+ # RRs for that server. So it may be that by the time we get to waking up
778+ # the destination, we don't actually have any RRs left to send out.
774779 #
775- # For even more background, see https://github.com/matrix-org/synapse/issues/4730.
780+ # For even more background, see
781+ # https://github.com/matrix-org/synapse/issues/4730.
776782
777783 room_id = receipt .room_id
778784
785+ # Local read receipts always have 1 event ID.
786+ event_id = receipt .event_ids [0 ]
787+
779788 # Work out which remote servers should be poked and poke them.
780789 domains_set = await self ._storage_controllers .state .get_current_hosts_in_room_or_partial_state_approximation (
781790 room_id
@@ -797,49 +806,51 @@ async def send_read_receipt(self, receipt: ReadReceipt) -> None:
797806 if not domains :
798807 return
799808
800- queues_pending_flush = self ._queues_awaiting_rr_flush_by_room .get (room_id )
809+ # We now split which domains we want to wake up immediately vs which we
810+ # want to delay waking up.
811+ immediate_domains : StrCollection
812+ delay_domains : StrCollection
801813
802- # if there is no flush yet scheduled, we will send out these receipts with
803- # immediate flushes, and schedule the next flush for this room.
804- if queues_pending_flush is not None :
805- logger . debug ( "Queuing receipt for: %r" , domains )
814+ if len ( domains ) < 10 :
815+ # For "small" rooms send to all domains immediately
816+ immediate_domains = domains
817+ delay_domains = ( )
806818 else :
807- logger .debug ("Sending receipt to: %r" , domains )
808- self ._schedule_rr_flush_for_room (room_id , len (domains ))
819+ metadata = await self .store .get_metadata_for_event (
820+ receipt .room_id , event_id
821+ )
822+ assert metadata is not None
809823
810- for domain in domains :
811- queue = self ._get_per_destination_queue (domain )
812- queue .queue_read_receipt (receipt )
824+ sender_domain = get_domain_from_id (metadata .sender )
813825
814- # if there is already a RR flush pending for this room, then make sure this
815- # destination is registered for the flush
816- if queues_pending_flush is not None :
817- queues_pending_flush . add ( queue )
826+ if self . clock . time_msec () - metadata . received_ts < 60_000 :
827+ # We always send receipts for recent messages immediately
828+ immediate_domains = domains
829+ delay_domains = ( )
818830 else :
819- queue .flush_read_receipts_for_room (room_id )
820-
821- def _schedule_rr_flush_for_room (self , room_id : str , n_domains : int ) -> None :
822- # that is going to cause approximately len(domains) transactions, so now back
823- # off for that multiplied by RR_TXN_INTERVAL_PER_ROOM
824- backoff_ms = self ._rr_txn_interval_per_room_ms * n_domains
825-
826- logger .debug ("Scheduling RR flush in %s in %d ms" , room_id , backoff_ms )
827- self .clock .call_later (backoff_ms , self ._flush_rrs_for_room , room_id )
828- self ._queues_awaiting_rr_flush_by_room [room_id ] = set ()
829-
830- def _flush_rrs_for_room (self , room_id : str ) -> None :
831- queues = self ._queues_awaiting_rr_flush_by_room .pop (room_id )
832- logger .debug ("Flushing RRs in %s to %s" , room_id , queues )
833-
834- if not queues :
835- # no more RRs arrived for this room; we are done.
836- return
831+ # Otherwise, we delay waking up all destinations except for the
832+ # sender's domain.
833+ immediate_domains = []
834+ delay_domains = []
835+ for domain in domains :
836+ if domain == sender_domain :
837+ immediate_domains .append (domain )
838+ else :
839+ delay_domains .append (domain )
840+
841+ for domain in immediate_domains :
842+ # Add to destination queue and wake the destination up
843+ queue = self ._get_per_destination_queue (domain )
844+ queue .queue_read_receipt (receipt )
845+ queue .attempt_new_transaction ()
837846
838- # schedule the next flush
839- self ._schedule_rr_flush_for_room (room_id , len (queues ))
847+ for domain in delay_domains :
848+ # Add to destination queue...
849+ queue = self ._get_per_destination_queue (domain )
850+ queue .queue_read_receipt (receipt )
840851
841- for queue in queues :
842- queue . flush_read_receipts_for_room ( room_id )
852+ # ... and schedule the destination to be woken up.
853+ self . _destination_wakeup_queue . add_to_queue ( domain )
843854
844855 async def send_presence_to_destinations (
845856 self , states : Iterable [UserPresenceState ], destinations : Iterable [str ]
0 commit comments