Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 828b550

Browse files
Remove _get_events_cache check optimisation from _have_seen_events_dict (#14161)
1 parent 2c63cdc commit 828b550

File tree

3 files changed

+14
-30
lines changed

3 files changed

+14
-30
lines changed

changelog.d/14161.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug introduced in 1.30.0 where purging and rejoining a room without restarting in-between would result in a broken room.

synapse/storage/databases/main/events_worker.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,38 +1502,33 @@ async def _have_seen_events_dict(
15021502
Returns:
15031503
a dict {event_id -> bool}
15041504
"""
1505-
# if the event cache contains the event, obviously we've seen it.
1506-
1507-
cache_results = {
1508-
event_id
1509-
for event_id in event_ids
1510-
if await self._get_event_cache.contains((event_id,))
1511-
}
1512-
results = dict.fromkeys(cache_results, True)
1513-
remaining = [
1514-
event_id for event_id in event_ids if event_id not in cache_results
1515-
]
1516-
if not remaining:
1517-
return results
1505+
# TODO: We used to query the _get_event_cache here as a fast-path before
1506+
# hitting the database. For if an event were in the cache, we've presumably
1507+
# seen it before.
1508+
#
1509+
# But this is currently an invalid assumption due to the _get_event_cache
1510+
# not being invalidated when purging events from a room. The optimisation can
1511+
# be re-added after https://github.com/matrix-org/synapse/issues/13476
15181512

1519-
def have_seen_events_txn(txn: LoggingTransaction) -> None:
1513+
def have_seen_events_txn(txn: LoggingTransaction) -> Dict[str, bool]:
15201514
# we deliberately do *not* query the database for room_id, to make the
15211515
# query an index-only lookup on `events_event_id_key`.
15221516
#
15231517
# We therefore pull the events from the database into a set...
15241518

15251519
sql = "SELECT event_id FROM events AS e WHERE "
15261520
clause, args = make_in_list_sql_clause(
1527-
txn.database_engine, "e.event_id", remaining
1521+
txn.database_engine, "e.event_id", event_ids
15281522
)
15291523
txn.execute(sql + clause, args)
15301524
found_events = {eid for eid, in txn}
15311525

15321526
# ... and then we can update the results for each key
1533-
results.update({eid: (eid in found_events) for eid in remaining})
1527+
return {eid: (eid in found_events) for eid in event_ids}
15341528

1535-
await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn)
1536-
return results
1529+
return await self.db_pool.runInteraction(
1530+
"have_seen_events", have_seen_events_txn
1531+
)
15371532

15381533
@cached(max_entries=100000, tree=True)
15391534
async def have_seen_event(self, room_id: str, event_id: str) -> bool:

tests/storage/databases/main/test_events_worker.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,6 @@ def test_simple(self):
9090
self.assertEqual(res, {self.event_ids[0]})
9191
self.assertEqual(ctx.get_resource_usage().db_txn_count, 0)
9292

93-
def test_query_via_event_cache(self):
94-
# fetch an event into the event cache
95-
self.get_success(self.store.get_event(self.event_ids[0]))
96-
97-
# looking it up should now cause no db hits
98-
with LoggingContext(name="test") as ctx:
99-
res = self.get_success(
100-
self.store.have_seen_events(self.room_id, [self.event_ids[0]])
101-
)
102-
self.assertEqual(res, {self.event_ids[0]})
103-
self.assertEqual(ctx.get_resource_usage().db_txn_count, 0)
104-
10593
def test_persisting_event_invalidates_cache(self):
10694
"""
10795
Test to make sure that the `have_seen_event` cache

0 commit comments

Comments
 (0)