Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 05e8c70

Browse files
Experimental Federation Speedup (#9702)
This basically speeds up federation by "squeezing" each individual dual database call (to destinations and destination_rooms), which previously happened per every event, into one call for an entire batch (100 max). Signed-off-by: Jonathan de Jong <[email protected]>
1 parent 00a6db9 commit 05e8c70

File tree

5 files changed

+129
-97
lines changed

5 files changed

+129
-97
lines changed

changelog.d/9702.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up federation transmission by using fewer database calls. Contributed by @ShadowJonathan.

contrib/experiments/test_messaging.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -224,14 +224,16 @@ def send_message(self, room_name, sender, body):
224224
destinations = yield self.get_servers_for_context(room_name)
225225

226226
try:
227-
yield self.replication_layer.send_pdu(
228-
Pdu.create_new(
229-
context=room_name,
230-
pdu_type="sy.room.message",
231-
content={"sender": sender, "body": body},
232-
origin=self.server_name,
233-
destinations=destinations,
234-
)
227+
yield self.replication_layer.send_pdus(
228+
[
229+
Pdu.create_new(
230+
context=room_name,
231+
pdu_type="sy.room.message",
232+
content={"sender": sender, "body": body},
233+
origin=self.server_name,
234+
destinations=destinations,
235+
)
236+
]
235237
)
236238
except Exception as e:
237239
logger.exception(e)
@@ -253,7 +255,7 @@ def join_room(self, room_name, sender, joinee):
253255
origin=self.server_name,
254256
destinations=destinations,
255257
)
256-
yield self.replication_layer.send_pdu(pdu)
258+
yield self.replication_layer.send_pdus([pdu])
257259
except Exception as e:
258260
logger.exception(e)
259261

@@ -265,16 +267,18 @@ def invite_to_room(self, room_name, sender, invitee):
265267
destinations = yield self.get_servers_for_context(room_name)
266268

267269
try:
268-
yield self.replication_layer.send_pdu(
269-
Pdu.create_new(
270-
context=room_name,
271-
is_state=True,
272-
pdu_type="sy.room.member",
273-
state_key=invitee,
274-
content={"membership": "invite"},
275-
origin=self.server_name,
276-
destinations=destinations,
277-
)
270+
yield self.replication_layer.send_pdus(
271+
[
272+
Pdu.create_new(
273+
context=room_name,
274+
is_state=True,
275+
pdu_type="sy.room.member",
276+
state_key=invitee,
277+
content={"membership": "invite"},
278+
origin=self.server_name,
279+
destinations=destinations,
280+
)
281+
]
278282
)
279283
except Exception as e:
280284
logger.exception(e)

synapse/federation/sender/__init__.py

Lines changed: 84 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,22 @@
1818

1919
from prometheus_client import Counter
2020

21-
from twisted.internet import defer
22-
2321
import synapse.metrics
2422
from synapse.api.presence import UserPresenceState
2523
from synapse.events import EventBase
2624
from synapse.federation.sender.per_destination_queue import PerDestinationQueue
2725
from synapse.federation.sender.transaction_manager import TransactionManager
2826
from synapse.federation.units import Edu
2927
from synapse.handlers.presence import get_interested_remotes
30-
from synapse.logging.context import (
31-
make_deferred_yieldable,
32-
preserve_fn,
33-
run_in_background,
34-
)
28+
from synapse.logging.context import preserve_fn
3529
from synapse.metrics import (
3630
LaterGauge,
3731
event_processing_loop_counter,
3832
event_processing_loop_room_count,
3933
events_processed_counter,
4034
)
4135
from synapse.metrics.background_process_metrics import run_as_background_process
42-
from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
36+
from synapse.types import Collection, JsonDict, ReadReceipt, RoomStreamToken
4337
from synapse.util.metrics import Measure, measure_func
4438

4539
if TYPE_CHECKING:
@@ -276,15 +270,27 @@ async def _process_event_queue_loop(self) -> None:
276270
if not events and next_token >= self._last_poked_id:
277271
break
278272

279-
async def handle_event(event: EventBase) -> None:
273+
async def get_destinations_for_event(
274+
event: EventBase,
275+
) -> Collection[str]:
276+
"""Computes the destinations to which this event must be sent.
277+
278+
This returns an empty tuple when there are no destinations to send to,
279+
or if this event is not from this homeserver and it is not sending
280+
it on behalf of another server.
281+
282+
Will also filter out destinations which this sender is not responsible for,
283+
if multiple federation senders exist.
284+
"""
285+
280286
# Only send events for this server.
281287
send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of()
282288
is_mine = self.is_mine_id(event.sender)
283289
if not is_mine and send_on_behalf_of is None:
284-
return
290+
return ()
285291

286292
if not event.internal_metadata.should_proactively_send():
287-
return
293+
return ()
288294

289295
destinations = None # type: Optional[Set[str]]
290296
if not event.prev_event_ids():
@@ -319,7 +325,7 @@ async def handle_event(event: EventBase) -> None:
319325
"Failed to calculate hosts in room for event: %s",
320326
event.event_id,
321327
)
322-
return
328+
return ()
323329

324330
destinations = {
325331
d
@@ -329,42 +335,45 @@ async def handle_event(event: EventBase) -> None:
329335
)
330336
}
331337

338+
destinations.discard(self.server_name)
339+
332340
if send_on_behalf_of is not None:
333341
# If we are sending the event on behalf of another server
334342
# then it already has the event and there is no reason to
335343
# send the event to it.
336344
destinations.discard(send_on_behalf_of)
337345

338-
logger.debug("Sending %s to %r", event, destinations)
339-
340346
if destinations:
341-
await self._send_pdu(event, destinations)
342-
343347
now = self.clock.time_msec()
344348
ts = await self.store.get_received_ts(event.event_id)
345349

346350
synapse.metrics.event_processing_lag_by_event.labels(
347351
"federation_sender"
348352
).observe((now - ts) / 1000)
349353

350-
async def handle_room_events(events: Iterable[EventBase]) -> None:
351-
with Measure(self.clock, "handle_room_events"):
352-
for event in events:
353-
await handle_event(event)
354-
355-
events_by_room = {} # type: Dict[str, List[EventBase]]
356-
for event in events:
357-
events_by_room.setdefault(event.room_id, []).append(event)
358-
359-
await make_deferred_yieldable(
360-
defer.gatherResults(
361-
[
362-
run_in_background(handle_room_events, evs)
363-
for evs in events_by_room.values()
364-
],
365-
consumeErrors=True,
366-
)
367-
)
354+
return destinations
355+
return ()
356+
357+
async def get_federatable_events_and_destinations(
358+
events: Iterable[EventBase],
359+
) -> List[Tuple[EventBase, Collection[str]]]:
360+
with Measure(self.clock, "get_destinations_for_events"):
361+
# Fetch federation destinations per event,
362+
# skip if get_destinations_for_event returns an empty collection,
363+
# return list of event->destinations pairs.
364+
return [
365+
(event, dests)
366+
for (event, dests) in [
367+
(event, await get_destinations_for_event(event))
368+
for event in events
369+
]
370+
if dests
371+
]
372+
373+
events_and_dests = await get_federatable_events_and_destinations(events)
374+
375+
# Send corresponding events to each destination queue
376+
await self._distribute_events(events_and_dests)
368377

369378
await self.store.update_federation_out_pos("events", next_token)
370379

@@ -382,7 +391,7 @@ async def handle_room_events(events: Iterable[EventBase]) -> None:
382391
events_processed_counter.inc(len(events))
383392

384393
event_processing_loop_room_count.labels("federation_sender").inc(
385-
len(events_by_room)
394+
len({event.room_id for event in events})
386395
)
387396

388397
event_processing_loop_counter.labels("federation_sender").inc()
@@ -394,34 +403,53 @@ async def handle_room_events(events: Iterable[EventBase]) -> None:
394403
finally:
395404
self._is_processing = False
396405

397-
async def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
398-
# We loop through all destinations to see whether we already have
399-
# a transaction in progress. If we do, stick it in the pending_pdus
400-
# table and we'll get back to it later.
406+
async def _distribute_events(
407+
self,
408+
events_and_dests: Iterable[Tuple[EventBase, Collection[str]]],
409+
) -> None:
410+
"""Distribute events to the respective per_destination queues.
401411
402-
destinations = set(destinations)
403-
destinations.discard(self.server_name)
404-
logger.debug("Sending to: %s", str(destinations))
412+
Also persists last-seen per-room stream_ordering to 'destination_rooms'.
405413
406-
if not destinations:
407-
return
414+
Args:
415+
events_and_dests: A list of tuples, which are (event: EventBase, destinations: Collection[str]).
416+
Every event is paired with its intended destinations (in federation).
417+
"""
418+
# Tuples of room_id + destination to their max-seen stream_ordering
419+
room_with_dest_stream_ordering = {} # type: Dict[Tuple[str, str], int]
408420

409-
sent_pdus_destination_dist_total.inc(len(destinations))
410-
sent_pdus_destination_dist_count.inc()
421+
# List of events to send to each destination
422+
events_by_dest = {} # type: Dict[str, List[EventBase]]
411423

412-
assert pdu.internal_metadata.stream_ordering
424+
# For each event-destinations pair...
425+
for event, destinations in events_and_dests:
413426

414-
# track the fact that we have a PDU for these destinations,
415-
# to allow us to perform catch-up later on if the remote is unreachable
416-
# for a while.
417-
await self.store.store_destination_rooms_entries(
418-
destinations,
419-
pdu.room_id,
420-
pdu.internal_metadata.stream_ordering,
427+
# (we got this from the database, it's filled)
428+
assert event.internal_metadata.stream_ordering
429+
430+
sent_pdus_destination_dist_total.inc(len(destinations))
431+
sent_pdus_destination_dist_count.inc()
432+
433+
# ...iterate over those destinations..
434+
for destination in destinations:
435+
# ...update their stream-ordering...
436+
room_with_dest_stream_ordering[(event.room_id, destination)] = max(
437+
event.internal_metadata.stream_ordering,
438+
room_with_dest_stream_ordering.get((event.room_id, destination), 0),
439+
)
440+
441+
# ...and add the event to each destination queue.
442+
events_by_dest.setdefault(destination, []).append(event)
443+
444+
# Bulk-store destination_rooms stream_ids
445+
await self.store.bulk_store_destination_rooms_entries(
446+
room_with_dest_stream_ordering
421447
)
422448

423-
for destination in destinations:
424-
self._get_per_destination_queue(destination).send_pdu(pdu)
449+
for destination, pdus in events_by_dest.items():
450+
logger.debug("Sending %d pdus to %s", len(pdus), destination)
451+
452+
self._get_per_destination_queue(destination).send_pdus(pdus)
425453

426454
async def send_read_receipt(self, receipt: ReadReceipt) -> None:
427455
"""Send a RR to any other servers in the room

synapse/federation/sender/per_destination_queue.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,19 +154,22 @@ def pending_edu_count(self) -> int:
154154
+ len(self._pending_edus_keyed)
155155
)
156156

157-
def send_pdu(self, pdu: EventBase) -> None:
158-
"""Add a PDU to the queue, and start the transmission loop if necessary
157+
def send_pdus(self, pdus: Iterable[EventBase]) -> None:
158+
"""Add PDUs to the queue, and start the transmission loop if necessary
159159
160160
Args:
161-
pdu: pdu to send
161+
pdus: pdus to send
162162
"""
163163
if not self._catching_up or self._last_successful_stream_ordering is None:
164164
# only enqueue the PDU if we are not catching up (False) or do not
165165
# yet know if we have anything to catch up (None)
166-
self._pending_pdus.append(pdu)
166+
self._pending_pdus.extend(pdus)
167167
else:
168-
assert pdu.internal_metadata.stream_ordering
169-
self._catchup_last_skipped = pdu.internal_metadata.stream_ordering
168+
self._catchup_last_skipped = max(
169+
pdu.internal_metadata.stream_ordering
170+
for pdu in pdus
171+
if pdu.internal_metadata.stream_ordering is not None
172+
)
170173

171174
self.attempt_new_transaction()
172175

synapse/storage/databases/main/transactions.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import logging
1616
from collections import namedtuple
17-
from typing import Iterable, List, Optional, Tuple
17+
from typing import Dict, List, Optional, Tuple
1818

1919
from canonicaljson import encode_canonical_json
2020

@@ -295,37 +295,33 @@ def _set_destination_retry_timings_emulated(
295295
},
296296
)
297297

298-
async def store_destination_rooms_entries(
299-
self,
300-
destinations: Iterable[str],
301-
room_id: str,
302-
stream_ordering: int,
303-
) -> None:
298+
async def bulk_store_destination_rooms_entries(
299+
self, room_and_destination_to_ordering: Dict[Tuple[str, str], int]
300+
):
304301
"""
305-
Updates or creates `destination_rooms` entries in batch for a single event.
302+
Updates or creates `destination_rooms` entries for a number of events.
306303
307304
Args:
308-
destinations: list of destinations
309-
room_id: the room_id of the event
310-
stream_ordering: the stream_ordering of the event
305+
room_and_destination_to_ordering: A mapping of (room, destination) -> stream_id
311306
"""
312307

313308
await self.db_pool.simple_upsert_many(
314309
table="destinations",
315310
key_names=("destination",),
316-
key_values=[(d,) for d in destinations],
311+
key_values={(d,) for _, d in room_and_destination_to_ordering.keys()},
317312
value_names=[],
318313
value_values=[],
319314
desc="store_destination_rooms_entries_dests",
320315
)
321316

322-
rows = [(destination, room_id) for destination in destinations]
323317
await self.db_pool.simple_upsert_many(
324318
table="destination_rooms",
325-
key_names=("destination", "room_id"),
326-
key_values=rows,
319+
key_names=("room_id", "destination"),
320+
key_values=list(room_and_destination_to_ordering.keys()),
327321
value_names=["stream_ordering"],
328-
value_values=[(stream_ordering,)] * len(rows),
322+
value_values=[
323+
(stream_id,) for stream_id in room_and_destination_to_ordering.values()
324+
],
329325
desc="store_destination_rooms_entries_rooms",
330326
)
331327

0 commit comments

Comments
 (0)