Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 43adf25

Browse files
authored
Refactor presence so we can prune user in room caches (#13313)
See #10826 and #10786 for context as to why we had to disable pruning on those caches. Now that `get_users_who_share_room_with_user` is called frequently only for presence, we just need to make calls to it less frequent and then we can remove the various levels of caching that is going on.
1 parent 357561c commit 43adf25

File tree

4 files changed

+109
-91
lines changed

4 files changed

+109
-91
lines changed

changelog.d/13313.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Change `get_users_in_room` and `get_rooms_for_user` caches to enable pruning of old entries.

synapse/handlers/presence.py

Lines changed: 36 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,13 @@
3434
Callable,
3535
Collection,
3636
Dict,
37-
FrozenSet,
3837
Generator,
3938
Iterable,
4039
List,
4140
Optional,
4241
Set,
4342
Tuple,
4443
Type,
45-
Union,
4644
)
4745

4846
from prometheus_client import Counter
@@ -68,7 +66,6 @@
6866
from synapse.streams import EventSource
6967
from synapse.types import JsonDict, StreamKeyType, UserID, get_domain_from_id
7068
from synapse.util.async_helpers import Linearizer
71-
from synapse.util.caches.descriptors import _CacheContext, cached
7269
from synapse.util.metrics import Measure
7370
from synapse.util.wheel_timer import WheelTimer
7471

@@ -1656,15 +1653,18 @@ async def get_new_events(
16561653
# doesn't return. C.f. #5503.
16571654
return [], max_token
16581655

1659-
# Figure out which other users this user should receive updates for
1660-
users_interested_in = await self._get_interested_in(user, explicit_room_id)
1656+
# Figure out which other users this user should explicitly receive
1657+
# updates for
1658+
additional_users_interested_in = (
1659+
await self.get_presence_router().get_interested_users(user.to_string())
1660+
)
16611661

16621662
# We have a set of users that we're interested in the presence of. We want to
16631663
# cross-reference that with the users that have actually changed their presence.
16641664

16651665
# Check whether this user should see all user updates
16661666

1667-
if users_interested_in == PresenceRouter.ALL_USERS:
1667+
if additional_users_interested_in == PresenceRouter.ALL_USERS:
16681668
# Provide presence state for all users
16691669
presence_updates = await self._filter_all_presence_updates_for_user(
16701670
user_id, include_offline, from_key
@@ -1673,34 +1673,47 @@ async def get_new_events(
16731673
return presence_updates, max_token
16741674

16751675
# Make mypy happy. users_interested_in should now be a set
1676-
assert not isinstance(users_interested_in, str)
1676+
assert not isinstance(additional_users_interested_in, str)
1677+
1678+
# We always care about our own presence.
1679+
additional_users_interested_in.add(user_id)
1680+
1681+
if explicit_room_id:
1682+
user_ids = await self.store.get_users_in_room(explicit_room_id)
1683+
additional_users_interested_in.update(user_ids)
16771684

16781685
# The set of users that we're interested in and that have had a presence update.
16791686
# We'll actually pull the presence updates for these users at the end.
1680-
interested_and_updated_users: Union[Set[str], FrozenSet[str]] = set()
1687+
interested_and_updated_users: Collection[str]
16811688

16821689
if from_key is not None:
16831690
# First get all users that have had a presence update
16841691
updated_users = stream_change_cache.get_all_entities_changed(from_key)
16851692

16861693
# Cross-reference users we're interested in with those that have had updates.
1687-
# Use a slightly-optimised method for processing smaller sets of updates.
1688-
if updated_users is not None and len(updated_users) < 500:
1689-
# For small deltas, it's quicker to get all changes and then
1690-
# cross-reference with the users we're interested in
1694+
if updated_users is not None:
1695+
# If we have the full list of changes for presence we can
1696+
# simply check which ones share a room with the user.
16911697
get_updates_counter.labels("stream").inc()
1692-
for other_user_id in updated_users:
1693-
if other_user_id in users_interested_in:
1694-
# mypy thinks this variable could be a FrozenSet as it's possibly set
1695-
# to one in the `get_entities_changed` call below, and `add()` is not
1696-
# method on a FrozenSet. That doesn't affect us here though, as
1697-
# `interested_and_updated_users` is clearly a set() above.
1698-
interested_and_updated_users.add(other_user_id) # type: ignore
1698+
1699+
sharing_users = await self.store.do_users_share_a_room(
1700+
user_id, updated_users
1701+
)
1702+
1703+
interested_and_updated_users = (
1704+
sharing_users.union(additional_users_interested_in)
1705+
).intersection(updated_users)
1706+
16991707
else:
17001708
# Too many possible updates. Find all users we can see and check
17011709
# if any of them have changed.
17021710
get_updates_counter.labels("full").inc()
17031711

1712+
users_interested_in = (
1713+
await self.store.get_users_who_share_room_with_user(user_id)
1714+
)
1715+
users_interested_in.update(additional_users_interested_in)
1716+
17041717
interested_and_updated_users = (
17051718
stream_change_cache.get_entities_changed(
17061719
users_interested_in, from_key
@@ -1709,7 +1722,10 @@ async def get_new_events(
17091722
else:
17101723
# No from_key has been specified. Return the presence for all users
17111724
# this user is interested in
1712-
interested_and_updated_users = users_interested_in
1725+
interested_and_updated_users = (
1726+
await self.store.get_users_who_share_room_with_user(user_id)
1727+
)
1728+
interested_and_updated_users.update(additional_users_interested_in)
17131729

17141730
# Retrieve the current presence state for each user
17151731
users_to_state = await self.get_presence_handler().current_state_for_users(
@@ -1804,62 +1820,6 @@ def _filter_offline_presence_state(
18041820
def get_current_key(self) -> int:
18051821
return self.store.get_current_presence_token()
18061822

1807-
@cached(num_args=2, cache_context=True)
1808-
async def _get_interested_in(
1809-
self,
1810-
user: UserID,
1811-
explicit_room_id: Optional[str] = None,
1812-
cache_context: Optional[_CacheContext] = None,
1813-
) -> Union[Set[str], str]:
1814-
"""Returns the set of users that the given user should see presence
1815-
updates for.
1816-
1817-
Args:
1818-
user: The user to retrieve presence updates for.
1819-
explicit_room_id: The users that are in the room will be returned.
1820-
1821-
Returns:
1822-
A set of user IDs to return presence updates for, or "ALL" to return all
1823-
known updates.
1824-
"""
1825-
user_id = user.to_string()
1826-
users_interested_in = set()
1827-
users_interested_in.add(user_id) # So that we receive our own presence
1828-
1829-
# cache_context isn't likely to ever be None due to the @cached decorator,
1830-
# but we can't have a non-optional argument after the optional argument
1831-
# explicit_room_id either. Assert cache_context is not None so we can use it
1832-
# without mypy complaining.
1833-
assert cache_context
1834-
1835-
# Check with the presence router whether we should poll additional users for
1836-
# their presence information
1837-
additional_users = await self.get_presence_router().get_interested_users(
1838-
user.to_string()
1839-
)
1840-
if additional_users == PresenceRouter.ALL_USERS:
1841-
# If the module requested that this user see the presence updates of *all*
1842-
# users, then simply return that instead of calculating what rooms this
1843-
# user shares
1844-
return PresenceRouter.ALL_USERS
1845-
1846-
# Add the additional users from the router
1847-
users_interested_in.update(additional_users)
1848-
1849-
# Find the users who share a room with this user
1850-
users_who_share_room = await self.store.get_users_who_share_room_with_user(
1851-
user_id, on_invalidate=cache_context.invalidate
1852-
)
1853-
users_interested_in.update(users_who_share_room)
1854-
1855-
if explicit_room_id:
1856-
user_ids = await self.store.get_users_in_room(
1857-
explicit_room_id, on_invalidate=cache_context.invalidate
1858-
)
1859-
users_interested_in.update(user_ids)
1860-
1861-
return users_interested_in
1862-
18631823

18641824
def handle_timeouts(
18651825
user_states: List[UserPresenceState],

synapse/storage/_base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ def _invalidate_state_caches(
8080
)
8181
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
8282

83+
# There's no easy way of invalidating this cache for just the users
84+
# that have changed, so we just clear the entire thing.
85+
self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
86+
8387
for user_id in members_changed:
8488
self._attempt_to_invalidate_cache(
8589
"get_user_in_room_with_profile", (room_id, user_id)

synapse/storage/databases/main/roommember.py

Lines changed: 68 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
FrozenSet,
2222
Iterable,
2323
List,
24+
Mapping,
2425
Optional,
2526
Set,
2627
Tuple,
@@ -55,6 +56,7 @@
5556
from synapse.util.async_helpers import Linearizer
5657
from synapse.util.caches import intern_string
5758
from synapse.util.caches.descriptors import _CacheContext, cached, cachedList
59+
from synapse.util.iterutils import batch_iter
5860
from synapse.util.metrics import Measure
5961

6062
if TYPE_CHECKING:
@@ -183,7 +185,7 @@ def _check_safe_current_state_events_membership_updated_txn(
183185
self._check_safe_current_state_events_membership_updated_txn,
184186
)
185187

186-
@cached(max_entries=100000, iterable=True, prune_unread_entries=False)
188+
@cached(max_entries=100000, iterable=True)
187189
async def get_users_in_room(self, room_id: str) -> List[str]:
188190
return await self.db_pool.runInteraction(
189191
"get_users_in_room", self.get_users_in_room_txn, room_id
@@ -561,7 +563,7 @@ async def get_local_current_membership_for_user_in_room(
561563

562564
return results_dict.get("membership"), results_dict.get("event_id")
563565

564-
@cached(max_entries=500000, iterable=True, prune_unread_entries=False)
566+
@cached(max_entries=500000, iterable=True)
565567
async def get_rooms_for_user_with_stream_ordering(
566568
self, user_id: str
567569
) -> FrozenSet[GetRoomsForUserWithStreamOrdering]:
@@ -732,25 +734,76 @@ async def get_rooms_for_user(
732734
)
733735
return frozenset(r.room_id for r in rooms)
734736

735-
@cached(
736-
max_entries=500000,
737-
cache_context=True,
738-
iterable=True,
739-
prune_unread_entries=False,
737+
@cached(max_entries=10000)
738+
async def does_pair_of_users_share_a_room(
739+
self, user_id: str, other_user_id: str
740+
) -> bool:
741+
raise NotImplementedError()
742+
743+
@cachedList(
744+
cached_method_name="does_pair_of_users_share_a_room", list_name="other_user_ids"
740745
)
741-
async def get_users_who_share_room_with_user(
742-
self, user_id: str, cache_context: _CacheContext
746+
async def _do_users_share_a_room(
747+
self, user_id: str, other_user_ids: Collection[str]
748+
) -> Mapping[str, Optional[bool]]:
749+
"""Return mapping from user ID to whether they share a room with the
750+
given user.
751+
752+
Note: `None` and `False` are equivalent and mean they don't share a
753+
room.
754+
"""
755+
756+
def do_users_share_a_room_txn(
757+
txn: LoggingTransaction, user_ids: Collection[str]
758+
) -> Dict[str, bool]:
759+
clause, args = make_in_list_sql_clause(
760+
self.database_engine, "state_key", user_ids
761+
)
762+
763+
# This query works by fetching both the list of rooms for the target
764+
# user and the set of other users, and then checking if there is any
765+
# overlap.
766+
sql = f"""
767+
SELECT b.state_key
768+
FROM (
769+
SELECT room_id FROM current_state_events
770+
WHERE type = 'm.room.member' AND membership = 'join' AND state_key = ?
771+
) AS a
772+
INNER JOIN (
773+
SELECT room_id, state_key FROM current_state_events
774+
WHERE type = 'm.room.member' AND membership = 'join' AND {clause}
775+
) AS b using (room_id)
776+
LIMIT 1
777+
"""
778+
779+
txn.execute(sql, (user_id, *args))
780+
return {u: True for u, in txn}
781+
782+
to_return = {}
783+
for batch_user_ids in batch_iter(other_user_ids, 1000):
784+
res = await self.db_pool.runInteraction(
785+
"do_users_share_a_room", do_users_share_a_room_txn, batch_user_ids
786+
)
787+
to_return.update(res)
788+
789+
return to_return
790+
791+
async def do_users_share_a_room(
792+
self, user_id: str, other_user_ids: Collection[str]
743793
) -> Set[str]:
794+
"""Return the set of users who share a room with the first users"""
795+
796+
user_dict = await self._do_users_share_a_room(user_id, other_user_ids)
797+
798+
return {u for u, share_room in user_dict.items() if share_room}
799+
800+
async def get_users_who_share_room_with_user(self, user_id: str) -> Set[str]:
744801
"""Returns the set of users who share a room with `user_id`"""
745-
room_ids = await self.get_rooms_for_user(
746-
user_id, on_invalidate=cache_context.invalidate
747-
)
802+
room_ids = await self.get_rooms_for_user(user_id)
748803

749804
user_who_share_room = set()
750805
for room_id in room_ids:
751-
user_ids = await self.get_users_in_room(
752-
room_id, on_invalidate=cache_context.invalidate
753-
)
806+
user_ids = await self.get_users_in_room(room_id)
754807
user_who_share_room.update(user_ids)
755808

756809
return user_who_share_room

0 commit comments

Comments
 (0)