Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 5e7847d

Browse files
authored
Cache user IDs instead of profile objects (#13573)
The profile objects are never used and increase cache size significantly.
1 parent 37f329c commit 5e7847d

File tree

5 files changed

+57
-54
lines changed

5 files changed

+57
-54
lines changed

changelog.d/13573.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cache user IDs instead of profiles to reduce cache memory usage. Contributed by Nick @ Beeper (@fizzadar).

synapse/handlers/sync.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2421,10 +2421,10 @@ async def get_rooms_for_user_at(
24212421
joined_room.room_id, joined_room.event_pos.stream
24222422
)
24232423
)
2424-
users_in_room = await self.state.get_current_users_in_room(
2424+
user_ids_in_room = await self.state.get_current_user_ids_in_room(
24252425
joined_room.room_id, extrems
24262426
)
2427-
if user_id in users_in_room:
2427+
if user_id in user_ids_in_room:
24282428
joined_room_ids.add(joined_room.room_id)
24292429

24302430
return frozenset(joined_room_ids)

synapse/state/__init__.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet
4545
from synapse.state import v1, v2
4646
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
47-
from synapse.storage.roommember import ProfileInfo
4847
from synapse.storage.state import StateFilter
4948
from synapse.types import StateMap
5049
from synapse.util.async_helpers import Linearizer
@@ -210,11 +209,11 @@ async def compute_state_after_events(
210209
ret = await self.resolve_state_groups_for_events(room_id, event_ids)
211210
return await ret.get_state(self._state_storage_controller, state_filter)
212211

213-
async def get_current_users_in_room(
212+
async def get_current_user_ids_in_room(
214213
self, room_id: str, latest_event_ids: List[str]
215-
) -> Dict[str, ProfileInfo]:
214+
) -> Set[str]:
216215
"""
217-
Get the users who are currently in a room.
216+
Get the users IDs who are currently in a room.
218217
219218
Note: This is much slower than using the equivalent method
220219
`DataStore.get_users_in_room` or `DataStore.get_users_in_room_with_profiles`,
@@ -225,15 +224,15 @@ async def get_current_users_in_room(
225224
room_id: The ID of the room.
226225
latest_event_ids: Precomputed list of latest event IDs. Will be computed if None.
227226
Returns:
228-
Dictionary of user IDs to their profileinfo.
227+
Set of user IDs in the room.
229228
"""
230229

231230
assert latest_event_ids is not None
232231

233-
logger.debug("calling resolve_state_groups from get_current_users_in_room")
232+
logger.debug("calling resolve_state_groups from get_current_user_ids_in_room")
234233
entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
235234
state = await entry.get_state(self._state_storage_controller, StateFilter.all())
236-
return await self.store.get_joined_users_from_state(room_id, state, entry)
235+
return await self.store.get_joined_user_ids_from_state(room_id, state, entry)
237236

238237
async def get_hosts_in_room_at_events(
239238
self, room_id: str, event_ids: Collection[str]

synapse/storage/databases/main/roommember.py

Lines changed: 29 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -835,9 +835,9 @@ async def get_mutual_rooms_between_users(
835835

836836
return shared_room_ids or frozenset()
837837

838-
async def get_joined_users_from_state(
838+
async def get_joined_user_ids_from_state(
839839
self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
840-
) -> Dict[str, ProfileInfo]:
840+
) -> Set[str]:
841841
state_group: Union[object, int] = state_entry.state_group
842842
if not state_group:
843843
# If state_group is None it means it has yet to be assigned a
@@ -848,25 +848,25 @@ async def get_joined_users_from_state(
848848

849849
assert state_group is not None
850850
with Measure(self._clock, "get_joined_users_from_state"):
851-
return await self._get_joined_users_from_context(
851+
return await self._get_joined_user_ids_from_context(
852852
room_id, state_group, state, context=state_entry
853853
)
854854

855855
@cached(num_args=2, iterable=True, max_entries=100000)
856-
async def _get_joined_users_from_context(
856+
async def _get_joined_user_ids_from_context(
857857
self,
858858
room_id: str,
859859
state_group: Union[object, int],
860860
current_state_ids: StateMap[str],
861861
event: Optional[EventBase] = None,
862862
context: Optional["_StateCacheEntry"] = None,
863-
) -> Dict[str, ProfileInfo]:
863+
) -> Set[str]:
864864
# We don't use `state_group`, it's there so that we can cache based
865865
# on it. However, it's important that it's never None, since two current_states
866866
# with a state_group of None are likely to be different.
867867
assert state_group is not None
868868

869-
users_in_room = {}
869+
users_in_room = set()
870870
member_event_ids = [
871871
e_id
872872
for key, e_id in current_state_ids.items()
@@ -879,19 +879,19 @@ async def _get_joined_users_from_context(
879879
# If we do then we can reuse that result and simply update it with
880880
# any membership changes in `delta_ids`
881881
if context.prev_group and context.delta_ids:
882-
prev_res = self._get_joined_users_from_context.cache.get_immediate(
882+
prev_res = self._get_joined_user_ids_from_context.cache.get_immediate(
883883
(room_id, context.prev_group), None
884884
)
885-
if prev_res and isinstance(prev_res, dict):
886-
users_in_room = dict(prev_res)
885+
if prev_res and isinstance(prev_res, set):
886+
users_in_room = prev_res
887887
member_event_ids = [
888888
e_id
889889
for key, e_id in context.delta_ids.items()
890890
if key[0] == EventTypes.Member
891891
]
892892
for etype, state_key in context.delta_ids:
893893
if etype == EventTypes.Member:
894-
users_in_room.pop(state_key, None)
894+
users_in_room.discard(state_key)
895895

896896
# We check if we have any of the member event ids in the event cache
897897
# before we ask the DB
@@ -908,42 +908,41 @@ async def _get_joined_users_from_context(
908908
ev_entry = event_map.get(event_id)
909909
if ev_entry and not ev_entry.event.rejected_reason:
910910
if ev_entry.event.membership == Membership.JOIN:
911-
users_in_room[ev_entry.event.state_key] = ProfileInfo(
912-
display_name=ev_entry.event.content.get("displayname", None),
913-
avatar_url=ev_entry.event.content.get("avatar_url", None),
914-
)
911+
users_in_room.add(ev_entry.event.state_key)
915912
else:
916913
missing_member_event_ids.append(event_id)
917914

918915
if missing_member_event_ids:
919-
event_to_memberships = await self._get_joined_profiles_from_event_ids(
916+
event_to_memberships = await self._get_user_ids_from_membership_event_ids(
920917
missing_member_event_ids
921918
)
922-
users_in_room.update(row for row in event_to_memberships.values() if row)
919+
users_in_room.update(event_to_memberships.values())
923920

924921
if event is not None and event.type == EventTypes.Member:
925922
if event.membership == Membership.JOIN:
926923
if event.event_id in member_event_ids:
927-
users_in_room[event.state_key] = ProfileInfo(
928-
display_name=event.content.get("displayname", None),
929-
avatar_url=event.content.get("avatar_url", None),
930-
)
924+
users_in_room.add(event.state_key)
931925

932926
return users_in_room
933927

934-
@cached(max_entries=10000)
935-
def _get_joined_profile_from_event_id(
928+
@cached(
929+
max_entries=10000,
930+
# This name matches the old function that has been replaced - the cache name
931+
# is kept here to maintain backwards compatibility.
932+
name="_get_joined_profile_from_event_id",
933+
)
934+
def _get_user_id_from_membership_event_id(
936935
self, event_id: str
937936
) -> Optional[Tuple[str, ProfileInfo]]:
938937
raise NotImplementedError()
939938

940939
@cachedList(
941-
cached_method_name="_get_joined_profile_from_event_id",
940+
cached_method_name="_get_user_id_from_membership_event_id",
942941
list_name="event_ids",
943942
)
944-
async def _get_joined_profiles_from_event_ids(
943+
async def _get_user_ids_from_membership_event_ids(
945944
self, event_ids: Iterable[str]
946-
) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]:
945+
) -> Dict[str, str]:
947946
"""For given set of member event_ids check if they point to a join
948947
event and if so return the associated user and profile info.
949948
@@ -958,21 +957,13 @@ async def _get_joined_profiles_from_event_ids(
958957
table="room_memberships",
959958
column="event_id",
960959
iterable=event_ids,
961-
retcols=("user_id", "display_name", "avatar_url", "event_id"),
960+
retcols=("user_id", "event_id"),
962961
keyvalues={"membership": Membership.JOIN},
963962
batch_size=1000,
964-
desc="_get_joined_profiles_from_event_ids",
963+
desc="_get_user_ids_from_membership_event_ids",
965964
)
966965

967-
return {
968-
row["event_id"]: (
969-
row["user_id"],
970-
ProfileInfo(
971-
avatar_url=row["avatar_url"], display_name=row["display_name"]
972-
),
973-
)
974-
for row in rows
975-
}
966+
return {row["event_id"]: row["user_id"] for row in rows}
976967

977968
@cached(max_entries=10000)
978969
async def is_host_joined(self, room_id: str, host: str) -> bool:
@@ -1131,12 +1122,12 @@ async def _get_joined_hosts(
11311122
else:
11321123
# The cache doesn't match the state group or prev state group,
11331124
# so we calculate the result from first principles.
1134-
joined_users = await self.get_joined_users_from_state(
1125+
joined_user_ids = await self.get_joined_user_ids_from_state(
11351126
room_id, state, state_entry
11361127
)
11371128

11381129
cache.hosts_to_joined_users = {}
1139-
for user_id in joined_users:
1130+
for user_id in joined_user_ids:
11401131
host = intern_string(get_domain_from_id(user_id))
11411132
cache.hosts_to_joined_users.setdefault(host, set()).add(user_id)
11421133

synapse/util/caches/descriptors.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,10 @@ def __init__(
7373
num_args: Optional[int],
7474
uncached_args: Optional[Collection[str]] = None,
7575
cache_context: bool = False,
76+
name: Optional[str] = None,
7677
):
7778
self.orig = orig
79+
self.name = name or orig.__name__
7880

7981
arg_spec = inspect.getfullargspec(orig)
8082
all_args = arg_spec.args
@@ -211,7 +213,7 @@ def __init__(
211213

212214
def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]:
213215
cache: LruCache[CacheKey, Any] = LruCache(
214-
cache_name=self.orig.__name__,
216+
cache_name=self.name,
215217
max_size=self.max_entries,
216218
)
217219

@@ -241,7 +243,7 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any:
241243

242244
wrapped = cast(_CachedFunction, _wrapped)
243245
wrapped.cache = cache
244-
obj.__dict__[self.orig.__name__] = wrapped
246+
obj.__dict__[self.name] = wrapped
245247

246248
return wrapped
247249

@@ -301,12 +303,14 @@ def __init__(
301303
cache_context: bool = False,
302304
iterable: bool = False,
303305
prune_unread_entries: bool = True,
306+
name: Optional[str] = None,
304307
):
305308
super().__init__(
306309
orig,
307310
num_args=num_args,
308311
uncached_args=uncached_args,
309312
cache_context=cache_context,
313+
name=name,
310314
)
311315

312316
if tree and self.num_args < 2:
@@ -321,7 +325,7 @@ def __init__(
321325

322326
def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]:
323327
cache: DeferredCache[CacheKey, Any] = DeferredCache(
324-
name=self.orig.__name__,
328+
name=self.name,
325329
max_entries=self.max_entries,
326330
tree=self.tree,
327331
iterable=self.iterable,
@@ -372,7 +376,7 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any:
372376
wrapped.cache = cache
373377
wrapped.num_args = self.num_args
374378

375-
obj.__dict__[self.orig.__name__] = wrapped
379+
obj.__dict__[self.name] = wrapped
376380

377381
return wrapped
378382

@@ -393,6 +397,7 @@ def __init__(
393397
cached_method_name: str,
394398
list_name: str,
395399
num_args: Optional[int] = None,
400+
name: Optional[str] = None,
396401
):
397402
"""
398403
Args:
@@ -403,7 +408,7 @@ def __init__(
403408
but including list_name) to use as cache keys. Defaults to all
404409
named args of the function.
405410
"""
406-
super().__init__(orig, num_args=num_args, uncached_args=None)
411+
super().__init__(orig, num_args=num_args, uncached_args=None, name=name)
407412

408413
self.list_name = list_name
409414

@@ -525,7 +530,7 @@ def errback_all(f: Failure) -> None:
525530
else:
526531
return defer.succeed(results)
527532

528-
obj.__dict__[self.orig.__name__] = wrapped
533+
obj.__dict__[self.name] = wrapped
529534

530535
return wrapped
531536

@@ -577,6 +582,7 @@ def cached(
577582
cache_context: bool = False,
578583
iterable: bool = False,
579584
prune_unread_entries: bool = True,
585+
name: Optional[str] = None,
580586
) -> Callable[[F], _CachedFunction[F]]:
581587
func = lambda orig: DeferredCacheDescriptor(
582588
orig,
@@ -587,13 +593,18 @@ def cached(
587593
cache_context=cache_context,
588594
iterable=iterable,
589595
prune_unread_entries=prune_unread_entries,
596+
name=name,
590597
)
591598

592599
return cast(Callable[[F], _CachedFunction[F]], func)
593600

594601

595602
def cachedList(
596-
*, cached_method_name: str, list_name: str, num_args: Optional[int] = None
603+
*,
604+
cached_method_name: str,
605+
list_name: str,
606+
num_args: Optional[int] = None,
607+
name: Optional[str] = None,
597608
) -> Callable[[F], _CachedFunction[F]]:
598609
"""Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`.
599610
@@ -628,6 +639,7 @@ def batch_do_something(self, first_arg, second_args):
628639
cached_method_name=cached_method_name,
629640
list_name=list_name,
630641
num_args=num_args,
642+
name=name,
631643
)
632644

633645
return cast(Callable[[F], _CachedFunction[F]], func)

0 commit comments

Comments
 (0)