1717import  unicodedata 
1818from  typing  import  (
1919    TYPE_CHECKING ,
20+     Collection ,
2021    Iterable ,
2122    List ,
2223    Mapping ,
4546if  TYPE_CHECKING :
4647    from  synapse .server  import  HomeServer 
4748
48- from  synapse .api .constants  import  EventTypes , HistoryVisibility , JoinRules 
49+ from  synapse .api .constants  import  EventTypes , HistoryVisibility , JoinRules ,  UserTypes 
4950from  synapse .storage .database  import  (
5051    DatabasePool ,
5152    LoggingDatabaseConnection ,
@@ -356,13 +357,30 @@ async def _populate_user_directory_process_users(
356357        Add all local users to the user directory. 
357358        """ 
358359
359-         def  _get_next_batch (txn : LoggingTransaction ) ->  Optional [List [str ]]:
360-             sql  =  "SELECT user_id FROM %s LIMIT %s"  %  (
361-                 TEMP_TABLE  +  "_users" ,
362-                 str (batch_size ),
363-             )
364-             txn .execute (sql )
365-             user_result  =  cast (List [Tuple [str ]], txn .fetchall ())
360+         def  _populate_user_directory_process_users_txn (
361+             txn : LoggingTransaction ,
362+         ) ->  Optional [int ]:
363+             if  self .database_engine .supports_returning :
364+                 # Note: we use an ORDER BY in the SELECT to force usage of an 
365+                 # index. Otherwise, postgres does a sequential scan that is 
366+                 # surprisingly slow (I think due to the fact it will read/skip 
367+                 # over lots of already deleted rows). 
368+                 sql  =  f""" 
369+                     DELETE FROM { TEMP_TABLE  +  "_users" }  
370+                     WHERE user_id IN ( 
371+                         SELECT user_id FROM { TEMP_TABLE  +  "_users" }   ORDER BY user_id LIMIT ? 
372+                     ) 
373+                     RETURNING user_id 
374+                 """ 
375+                 txn .execute (sql , (batch_size ,))
376+                 user_result  =  cast (List [Tuple [str ]], txn .fetchall ())
377+             else :
378+                 sql  =  "SELECT user_id FROM %s ORDER BY user_id LIMIT %s"  %  (
379+                     TEMP_TABLE  +  "_users" ,
380+                     str (batch_size ),
381+                 )
382+                 txn .execute (sql )
383+                 user_result  =  cast (List [Tuple [str ]], txn .fetchall ())
366384
367385            if  not  user_result :
368386                return  None 
@@ -378,85 +396,81 @@ def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]:
378396                assert  count_result  is  not   None 
379397                progress ["remaining" ] =  count_result [0 ]
380398
381-             return  users_to_work_on 
382- 
383-         users_to_work_on  =  await  self .db_pool .runInteraction (
384-             "populate_user_directory_temp_read" , _get_next_batch 
385-         )
399+             if  not  users_to_work_on :
400+                 return  None 
386401
387-         # No more users -- complete the transaction. 
388-         if   not   users_to_work_on : 
389-             await   self . db_pool . updates . _end_background_update ( 
390-                 "populate_user_directory_process_users" 
402+              logger . debug ( 
403+                  "Processing the next %d users of %d remaining" , 
404+                  len ( users_to_work_on ), 
405+                 progress [ "remaining" ], 
391406            )
392-             return  1 
393- 
394-         logger .debug (
395-             "Processing the next %d users of %d remaining" 
396-             %  (len (users_to_work_on ), progress ["remaining" ])
397-         )
398407
399-         # First filter down to users we want to insert into the user directory. 
400-         users_to_insert  =  [
401-             user_id 
402-             for  user_id  in  users_to_work_on 
403-             if  await  self .should_include_local_user_in_dir (user_id )
404-         ]
408+             # First filter down to users we want to insert into the user directory. 
409+             users_to_insert  =  self ._filter_local_users_for_dir_txn (
410+                 txn , users_to_work_on 
411+             )
405412
406-         # Next fetch their profiles. Note that the `user_id` here is the 
407-         # *localpart*, and that not all users have profiles. 
408-         profile_rows  =  await  self .db_pool .simple_select_many_batch (
409-             table = "profiles" ,
410-             column = "user_id" ,
411-             iterable = [get_localpart_from_id (u ) for  u  in  users_to_insert ],
412-             retcols = (
413-                 "user_id" ,
414-                 "displayname" ,
415-                 "avatar_url" ,
416-             ),
417-             keyvalues = {},
418-             desc = "populate_user_directory_process_users_get_profiles" ,
419-         )
420-         profiles  =  {
421-             f"@{ row ['user_id' ]}  :{ self .server_name }  " : _UserDirProfile (
422-                 f"@{ row ['user_id' ]}  :{ self .server_name }  " ,
423-                 row ["displayname" ],
424-                 row ["avatar_url" ],
413+             # Next fetch their profiles. Note that the `user_id` here is the 
414+             # *localpart*, and that not all users have profiles. 
415+             profile_rows  =  self .db_pool .simple_select_many_txn (
416+                 txn ,
417+                 table = "profiles" ,
418+                 column = "user_id" ,
419+                 iterable = [get_localpart_from_id (u ) for  u  in  users_to_insert ],
420+                 retcols = (
421+                     "user_id" ,
422+                     "displayname" ,
423+                     "avatar_url" ,
424+                 ),
425+                 keyvalues = {},
425426            )
426-             for  row  in  profile_rows 
427-         }
427+             profiles  =  {
428+                 f"@{ row ['user_id' ]}  :{ self .server_name }  " : _UserDirProfile (
429+                     f"@{ row ['user_id' ]}  :{ self .server_name }  " ,
430+                     row ["displayname" ],
431+                     row ["avatar_url" ],
432+                 )
433+                 for  row  in  profile_rows 
434+             }
428435
429-         profiles_to_insert  =  [
430-             profiles .get (user_id ) or  _UserDirProfile (user_id )
431-             for  user_id  in  users_to_insert 
432-         ]
436+             profiles_to_insert  =  [
437+                 profiles .get (user_id ) or  _UserDirProfile (user_id )
438+                 for  user_id  in  users_to_insert 
439+             ]
440+ 
441+             # Actually insert the users with their profiles into the directory. 
442+             self ._update_profiles_in_user_dir_txn (txn , profiles_to_insert )
443+ 
444+             # We've finished processing the users. Delete it from the table, if 
445+             # we haven't already. 
446+             if  not  self .database_engine .supports_returning :
447+                 self .db_pool .simple_delete_many_txn (
448+                     txn ,
449+                     table = TEMP_TABLE  +  "_users" ,
450+                     column = "user_id" ,
451+                     values = users_to_work_on ,
452+                     keyvalues = {},
453+                 )
433454
434-         # Actually insert the users with their profiles into  the directory .
435-         await   self . db_pool . runInteraction ( 
436-             "populate_user_directory_process_users_insertion" , 
437-             self . _update_profiles_in_user_dir_txn , 
438-             profiles_to_insert , 
439-         )
455+              # Update  the remaining counter .
456+              progress [ "remaining" ]  -=   len ( users_to_work_on ) 
457+             self . db_pool . updates . _background_update_progress_txn ( 
458+                  txn ,  "populate_user_directory_process_users" ,  progress 
459+             ) 
460+              return   len ( users_to_work_on )
440461
441-         # We've finished processing the users. Delete it from the table. 
442-         await  self .db_pool .simple_delete_many (
443-             table = TEMP_TABLE  +  "_users" ,
444-             column = "user_id" ,
445-             iterable = users_to_work_on ,
446-             keyvalues = {},
447-             desc = "populate_user_directory_process_users_delete" ,
462+         processed_count  =  await  self .db_pool .runInteraction (
463+             "populate_user_directory_temp" , _populate_user_directory_process_users_txn 
448464        )
449465
450-         # Update the remaining counter. 
451-         progress ["remaining" ] -=  len (users_to_work_on )
452-         await  self .db_pool .runInteraction (
453-             "populate_user_directory" ,
454-             self .db_pool .updates ._background_update_progress_txn ,
455-             "populate_user_directory_process_users" ,
456-             progress ,
457-         )
466+         # No more users -- complete the transaction. 
467+         if  not  processed_count :
468+             await  self .db_pool .updates ._end_background_update (
469+                 "populate_user_directory_process_users" 
470+             )
471+             return  1 
458472
459-         return  len ( users_to_work_on ) 
473+         return  processed_count 
460474
461475    async  def  should_include_local_user_in_dir (self , user : str ) ->  bool :
462476        """Certain classes of local user are omitted from the user directory. 
@@ -494,6 +508,30 @@ async def should_include_local_user_in_dir(self, user: str) -> bool:
494508
495509        return  True 
496510
511+     def  _filter_local_users_for_dir_txn (
512+         self , txn : LoggingTransaction , users : Collection [str ]
513+     ) ->  Collection [str ]:
514+         """A batched version of `should_include_local_user_in_dir`""" 
515+         users  =  [
516+             user 
517+             for  user  in  users 
518+             if  self .get_app_service_by_user_id (user ) is  None   # type: ignore[attr-defined] 
519+             and  not  self .get_if_app_services_interested_in_user (user )  # type: ignore[attr-defined] 
520+         ]
521+ 
522+         rows  =  self .db_pool .simple_select_many_txn (
523+             txn ,
524+             table = "users" ,
525+             column = "name" ,
526+             iterable = users ,
527+             keyvalues = {
528+                 "deactivated" : 0 ,
529+             },
530+             retcols = ("name" , "user_type" ),
531+         )
532+ 
533+         return  [row ["name" ] for  row  in  rows  if  row ["user_type" ] !=  UserTypes .SUPPORT ]
534+ 
497535    async  def  is_room_world_readable_or_publicly_joinable (self , room_id : str ) ->  bool :
498536        """Check if the room is either world_readable or publically joinable""" 
499537
0 commit comments