2525from synapse .server import HomeServer
2626from synapse .storage import DataStore
2727from synapse .storage .background_updates import _BackgroundUpdateHandler
28+ from synapse .storage .databases .main import user_directory
29+ from synapse .storage .databases .main .user_directory import (
30+ _parse_words_with_icu ,
31+ _parse_words_with_regex ,
32+ )
2833from synapse .storage .roommember import ProfileInfo
2934from synapse .util import Clock
3035
4247BOB = "@bob:b"
4348BOBBY = "@bobby:a"
4449# The localpart isn't 'Bela' on purpose so we can test looking up display names.
45- BELA = "@somenickname:a "
50+ BELA = "@somenickname:example.org "
4651
4752
4853class GetUserDirectoryTables :
@@ -423,6 +428,8 @@ async def mocked_process_users(*args: Any, **kwargs: Any) -> int:
423428
424429
425430class UserDirectoryStoreTestCase (HomeserverTestCase ):
431+ use_icu = False
432+
426433 def prepare (self , reactor : MemoryReactor , clock : Clock , hs : HomeServer ) -> None :
427434 self .store = hs .get_datastores ().main
428435
@@ -434,6 +441,12 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
434441 self .get_success (self .store .update_profile_in_user_dir (BELA , "Bela" , None ))
435442 self .get_success (self .store .add_users_in_public_rooms ("!room:id" , (ALICE , BOB )))
436443
444+ self ._restore_use_icu = user_directory .USE_ICU
445+ user_directory .USE_ICU = self .use_icu
446+
447+ def tearDown (self ) -> None :
448+ user_directory .USE_ICU = self ._restore_use_icu
449+
437450 def test_search_user_dir (self ) -> None :
438451 # normally when alice searches the directory she should just find
439452 # bob because bobby doesn't share a room with her.
@@ -478,6 +491,26 @@ def test_search_user_dir_stop_words(self) -> None:
478491 {"user_id" : BELA , "display_name" : "Bela" , "avatar_url" : None },
479492 )
480493
494+ @override_config ({"user_directory" : {"search_all_users" : True }})
495+ def test_search_user_dir_start_of_user_id (self ) -> None :
496+ """Tests that a user can look up another user by searching for the start
497+ of their user ID.
498+ """
499+ r = self .get_success (self .store .search_user_dir (ALICE , "somenickname:exa" , 10 ))
500+ self .assertFalse (r ["limited" ])
501+ self .assertEqual (1 , len (r ["results" ]))
502+ self .assertDictEqual (
503+ r ["results" ][0 ],
504+ {"user_id" : BELA , "display_name" : "Bela" , "avatar_url" : None },
505+ )
506+
507+
508+ class UserDirectoryStoreTestCaseWithIcu (UserDirectoryStoreTestCase ):
509+ use_icu = True
510+
511+ if not icu :
512+ skip = "Requires PyICU"
513+
481514
482515class UserDirectoryICUTestCase (HomeserverTestCase ):
483516 if not icu :
@@ -513,3 +546,31 @@ def test_icu_word_boundary(self) -> None:
513546 r ["results" ][0 ],
514547 {"user_id" : ALICE , "display_name" : display_name , "avatar_url" : None },
515548 )
549+
550+ def test_icu_word_boundary_punctuation (self ) -> None :
551+ """
552+ Tests the behaviour of punctuation with the ICU tokeniser.
553+
554+ Seems to depend on underlying version of ICU.
555+ """
556+
557+ # Note: either tokenisation is fine, because Postgres actually splits
558+ # words itself afterwards.
559+ self .assertIn (
560+ _parse_words_with_icu ("lazy'fox jumped:over the.dog" ),
561+ (
562+ # ICU 66 on Ubuntu 20.04
563+ ["lazy'fox" , "jumped" , "over" , "the" , "dog" ],
564+ # ICU 70 on Ubuntu 22.04
565+ ["lazy'fox" , "jumped:over" , "the.dog" ],
566+ ),
567+ )
568+
569+ def test_regex_word_boundary_punctuation (self ) -> None :
570+ """
571+ Tests the behaviour of punctuation with the non-ICU tokeniser
572+ """
573+ self .assertEqual (
574+ _parse_words_with_regex ("lazy'fox jumped:over the.dog" ),
575+ ["lazy" , "fox" , "jumped" , "over" , "the" , "dog" ],
576+ )
0 commit comments