Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 318162f

Browse files
author
David Robertson
authored
Easy refactors of the user directory (#10789)
No functional changes here. This came out as I was working to tackle #5677
1 parent c6f5fb5 commit 318162f

File tree

8 files changed

+90
-37
lines changed

8 files changed

+90
-37
lines changed

changelog.d/10789.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve internal details of the user directory code.

docs/user_directory.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,40 @@ DB corruption) get stale or out of sync. If this happens, for now the
1010
solution to fix it is to execute the SQL [here](https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/main/delta/53/user_dir_populate.sql)
1111
and then restart synapse. This should then start a background task to
1212
flush the current tables and regenerate the directory.
13+
14+
Data model
15+
----------
16+
17+
There are five relevant tables that collectively form the "user directory".
18+
Three of them track a master list of all the users we could search for.
19+
The last two (collectively called the "search tables") track who can
20+
see who.
21+
22+
From all of these tables we exclude three types of local user:
23+
- support users
24+
- appservice users
25+
- deactivated users
26+
27+
* `user_directory`. This contains the user_id, display name and avatar we'll
28+
return when you search the directory.
29+
- Because there's only one directory entry per user, it's important that we only
30+
ever put publicly visible names here. Otherwise we might leak a private
31+
nickname or avatar used in a private room.
32+
- Indexed on rooms. Indexed on users.
33+
34+
* `user_directory_search`. To be joined to `user_directory`. It contains an extra
35+
column that enables full text search based on user ids and display names.
36+
Different schemas for SQLite and Postgres with different code paths to match.
37+
- Indexed on the full text search data. Indexed on users.
38+
39+
* `user_directory_stream_pos`. When the initial background update to populate
40+
the directory is complete, we record a stream position here. This indicates
41+
that synapse should now listen for room changes and incrementally update
42+
the directory where necessary.
43+
44+
* `users_in_public_rooms`. Contains associations between users and the public rooms they're in.
45+
Used to determine which users are in public rooms and should be publicly visible in the directory.
46+
47+
* `users_who_share_private_rooms`. Rows are triples `(L, M, room id)` where `L`
48+
is a local user and `M` is a local or remote user. `L` and `M` should be
49+
different, but this isn't enforced by a constraint.

synapse/handlers/deactivate_account.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ async def deactivate_account(
131131
await self.store.add_user_pending_deactivation(user_id)
132132

133133
# delete from user directory
134-
await self.user_directory_handler.handle_user_deactivated(user_id)
134+
await self.user_directory_handler.handle_local_user_deactivated(user_id)
135135

136136
# Mark the user as erased, if they asked for that
137137
if erase_data:

synapse/handlers/state_deltas.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import logging
16+
from enum import Enum, auto
1617
from typing import TYPE_CHECKING, Optional
1718

1819
if TYPE_CHECKING:
@@ -21,6 +22,12 @@
2122
logger = logging.getLogger(__name__)
2223

2324

25+
class MatchChange(Enum):
26+
no_change = auto()
27+
now_true = auto()
28+
now_false = auto()
29+
30+
2431
class StateDeltasHandler:
2532
def __init__(self, hs: "HomeServer"):
2633
self.store = hs.get_datastore()
@@ -31,18 +38,12 @@ async def _get_key_change(
3138
event_id: Optional[str],
3239
key_name: str,
3340
public_value: str,
34-
) -> Optional[bool]:
41+
) -> MatchChange:
3542
"""Given two events check if the `key_name` field in content changed
3643
from not matching `public_value` to doing so.
3744
3845
For example, check if `history_visibility` (`key_name`) changed from
3946
`shared` to `world_readable` (`public_value`).
40-
41-
Returns:
42-
None if the field in the events either both match `public_value`
43-
or if neither do, i.e. there has been no change.
44-
True if it didn't match `public_value` but now does
45-
False if it did match `public_value` but now doesn't
4647
"""
4748
prev_event = None
4849
event = None
@@ -54,7 +55,7 @@ async def _get_key_change(
5455

5556
if not event and not prev_event:
5657
logger.debug("Neither event exists: %r %r", prev_event_id, event_id)
57-
return None
58+
return MatchChange.no_change
5859

5960
prev_value = None
6061
value = None
@@ -68,8 +69,8 @@ async def _get_key_change(
6869
logger.debug("prev_value: %r -> value: %r", prev_value, value)
6970

7071
if value == public_value and prev_value != public_value:
71-
return True
72+
return MatchChange.now_true
7273
elif value != public_value and prev_value == public_value:
73-
return False
74+
return MatchChange.now_false
7475
else:
75-
return None
76+
return MatchChange.no_change

synapse/handlers/user_directory.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import synapse.metrics
1919
from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, Membership
20-
from synapse.handlers.state_deltas import StateDeltasHandler
20+
from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler
2121
from synapse.metrics.background_process_metrics import run_as_background_process
2222
from synapse.storage.roommember import ProfileInfo
2323
from synapse.types import JsonDict
@@ -30,14 +30,26 @@
3030

3131

3232
class UserDirectoryHandler(StateDeltasHandler):
33-
"""Handles querying of and keeping updated the user_directory.
33+
"""Handles queries and updates for the user_directory.
3434
3535
N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY
3636
37-
The user directory is filled with users who this server can see are joined to a
38-
world_readable or publicly joinable room. We keep a database table up to date
39-
by streaming changes of the current state and recalculating whether users should
40-
be in the directory or not when necessary.
37+
When a local user searches the user_directory, we report two kinds of users:
38+
39+
- users this server can see are joined to a world_readable or publicly
40+
joinable room, and
41+
- users belonging to a private room shared by that local user.
42+
43+
The two cases are tracked separately in the `users_in_public_rooms` and
44+
`users_who_share_private_rooms` tables. Both kinds of users have their
45+
username and avatar tracked in a `user_directory` table.
46+
47+
This handler has three responsibilities:
48+
1. Forwarding requests to `/user_directory/search` to the UserDirectoryStore.
49+
2. Providing hooks for the application to call when local users are added,
50+
removed, or have their profile changed.
51+
3. Listening for room state changes that indicate remote users have
52+
joined or left a room, or that their profile has changed.
4153
"""
4254

4355
def __init__(self, hs: "HomeServer"):
@@ -130,7 +142,7 @@ async def handle_local_profile_change(
130142
user_id, profile.display_name, profile.avatar_url
131143
)
132144

133-
async def handle_user_deactivated(self, user_id: str) -> None:
145+
async def handle_local_user_deactivated(self, user_id: str) -> None:
134146
"""Called when a user ID is deactivated"""
135147
# FIXME(#3714): We should probably do this in the same worker as all
136148
# the other changes.
@@ -196,7 +208,7 @@ async def _handle_deltas(self, deltas: List[Dict[str, Any]]) -> None:
196208
public_value=Membership.JOIN,
197209
)
198210

199-
if change is False:
211+
if change is MatchChange.now_false:
200212
# Need to check if the server left the room entirely, if so
201213
# we might need to remove all the users in that room
202214
is_in_room = await self.store.is_host_joined(
@@ -219,14 +231,14 @@ async def _handle_deltas(self, deltas: List[Dict[str, Any]]) -> None:
219231

220232
is_support = await self.store.is_support_user(state_key)
221233
if not is_support:
222-
if change is None:
234+
if change is MatchChange.no_change:
223235
# Handle any profile changes
224236
await self._handle_profile_change(
225237
state_key, room_id, prev_event_id, event_id
226238
)
227239
continue
228240

229-
if change: # The user joined
241+
if change is MatchChange.now_true: # The user joined
230242
event = await self.store.get_event(event_id, allow_none=True)
231243
# It isn't expected for this event to not exist, but we
232244
# don't want the entire background process to break.
@@ -263,24 +275,22 @@ async def _handle_room_publicity_change(
263275
logger.debug("Handling change for %s: %s", typ, room_id)
264276

265277
if typ == EventTypes.RoomHistoryVisibility:
266-
change = await self._get_key_change(
278+
publicness = await self._get_key_change(
267279
prev_event_id,
268280
event_id,
269281
key_name="history_visibility",
270282
public_value=HistoryVisibility.WORLD_READABLE,
271283
)
272284
elif typ == EventTypes.JoinRules:
273-
change = await self._get_key_change(
285+
publicness = await self._get_key_change(
274286
prev_event_id,
275287
event_id,
276288
key_name="join_rule",
277289
public_value=JoinRules.PUBLIC,
278290
)
279291
else:
280292
raise Exception("Invalid event type")
281-
# If change is None, no change. True => become world_readable/public,
282-
# False => was world_readable/public
283-
if change is None:
293+
if publicness is MatchChange.no_change:
284294
logger.debug("No change")
285295
return
286296

@@ -290,13 +300,13 @@ async def _handle_room_publicity_change(
290300
room_id
291301
)
292302

293-
logger.debug("Change: %r, is_public: %r", change, is_public)
303+
logger.debug("Change: %r, publicness: %r", publicness, is_public)
294304

295-
if change and not is_public:
305+
if publicness is MatchChange.now_true and not is_public:
296306
# If we became world readable but room isn't currently public then
297307
# we ignore the change
298308
return
299-
elif not change and is_public:
309+
elif publicness is MatchChange.now_false and is_public:
300310
# If we stopped being world readable but are still public,
301311
# ignore the change
302312
return

synapse/storage/databases/main/roommember.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,11 @@ async def get_users_in_room_with_profiles(
196196
) -> Dict[str, ProfileInfo]:
197197
"""Get a mapping from user ID to profile information for all users in a given room.
198198
199+
The profile information comes directly from this room's `m.room.member`
200+
events, and so may be specific to this room rather than part of a user's
201+
global profile. To avoid privacy leaks, the profile data should only be
202+
revealed to users who are already in this room.
203+
199204
Args:
200205
room_id: The ID of the room to retrieve the users of.
201206

synapse/storage/databases/main/user_directory.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,6 @@ def _get_next_batch(txn):
196196
)
197197

198198
users_with_profile = await self.get_users_in_room_with_profiles(room_id)
199-
user_ids = set(users_with_profile)
200199

201200
# Update each user in the user directory.
202201
for user_id, profile in users_with_profile.items():
@@ -207,7 +206,7 @@ def _get_next_batch(txn):
207206
to_insert = set()
208207

209208
if is_public:
210-
for user_id in user_ids:
209+
for user_id in users_with_profile:
211210
if self.get_if_app_services_interested_in_user(user_id):
212211
continue
213212

@@ -217,14 +216,14 @@ def _get_next_batch(txn):
217216
await self.add_users_in_public_rooms(room_id, to_insert)
218217
to_insert.clear()
219218
else:
220-
for user_id in user_ids:
219+
for user_id in users_with_profile:
221220
if not self.hs.is_mine_id(user_id):
222221
continue
223222

224223
if self.get_if_app_services_interested_in_user(user_id):
225224
continue
226225

227-
for other_user_id in user_ids:
226+
for other_user_id in users_with_profile:
228227
if user_id == other_user_id:
229228
continue
230229

tests/handlers/test_user_directory.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def test_handle_local_profile_change_with_deactivated_user(self):
9494

9595
# deactivate user
9696
self.get_success(self.store.set_user_deactivated_status(r_user_id, True))
97-
self.get_success(self.handler.handle_user_deactivated(r_user_id))
97+
self.get_success(self.handler.handle_local_user_deactivated(r_user_id))
9898

9999
# profile is not in directory
100100
profile = self.get_success(self.store.get_user_in_directory(r_user_id))
@@ -118,7 +118,7 @@ def test_handle_user_deactivated_support_user(self):
118118
)
119119

120120
self.store.remove_from_user_dir = Mock(return_value=defer.succeed(None))
121-
self.get_success(self.handler.handle_user_deactivated(s_user_id))
121+
self.get_success(self.handler.handle_local_user_deactivated(s_user_id))
122122
self.store.remove_from_user_dir.not_called()
123123

124124
def test_handle_user_deactivated_regular_user(self):
@@ -127,7 +127,7 @@ def test_handle_user_deactivated_regular_user(self):
127127
self.store.register_user(user_id=r_user_id, password_hash=None)
128128
)
129129
self.store.remove_from_user_dir = Mock(return_value=defer.succeed(None))
130-
self.get_success(self.handler.handle_user_deactivated(r_user_id))
130+
self.get_success(self.handler.handle_local_user_deactivated(r_user_id))
131131
self.store.remove_from_user_dir.called_once_with(r_user_id)
132132

133133
def test_private_room(self):

0 commit comments

Comments
 (0)