@@ -380,6 +380,16 @@ def generate_migrated_groups(self) -> Iterable[MigratedGroup]:
380380 )
381381
382382
383+ class GroupDeletionIncompleteError (RuntimeError ):
384+ __slots__ = ("group_id" , "display_name" )
385+
386+ def __init__ (self , group_id : str , display_name : str | None ) -> None :
387+ msg = f"Group deletion incomplete: { display_name if display_name else '<name-missing>' } (id={ group_id } )"
388+ super ().__init__ (msg )
389+ self .group_id = group_id
390+ self .display_name = display_name
391+
392+
383393class GroupRenameIncompleteError (RuntimeError ):
384394 __slots__ = ("group_id" , "old_name" , "new_name" )
385395
@@ -530,27 +540,49 @@ def get_migration_state(self) -> MigrationState:
530540 return MigrationState (self .snapshot ())
531541
532542 def delete_original_workspace_groups (self ):
533- tasks = []
534- workspace_groups_in_workspace = self ._workspace_groups_in_workspace ()
535543 account_groups_in_workspace = self ._account_groups_in_workspace ()
536544 migrated_groups = self .snapshot ()
537545 logger .info (f"Starting to remove { len (migrated_groups )} migrated workspace groups..." )
546+ # Group deletion is eventually consistent, and not monotonically consistent, with a rather long time to
547+ # converge: internally API caches some things for up to 60s. To avoid excessive wait times when large numbers of
548+ # groups need to be deleted (some deployments have >10K groups) we use the following steps:
549+ # 1. Delete the groups.
550+ # 2. Confirm that direct GETs no longer see the group.
551+ # 3. Confirm that account enumeration no longer includes the deleted groups.
552+ deletion_tasks = []
553+ waiting_tasks = []
554+ deleted_groups = []
538555 for migrated_group in migrated_groups :
539- if migrated_group .temporary_name not in workspace_groups_in_workspace :
540- logger .info (f"Skipping { migrated_group .name_in_workspace } : no longer in workspace" )
541- continue
542556 if migrated_group .name_in_account not in account_groups_in_workspace :
543- logger .info (f"Skipping { migrated_group .name_in_account } : not reflected in workspace" )
557+ logger .warning (
558+ f"Not deleting group { migrated_group .temporary_name } (id={ migrated_group .id_in_workspace } ) (originally { migrated_group .name_in_workspace } ): its migrated account group ({ migrated_group .name_in_account } ) cannot be found."
559+ )
544560 continue
545- tasks .append (
561+ deletion_tasks .append (
546562 functools .partial (
547563 self ._delete_workspace_group , migrated_group .id_in_workspace , migrated_group .temporary_name
548564 )
549565 )
550- _ , errors = Threads .gather ("removing original workspace groups" , tasks )
566+ waiting_tasks .append (
567+ functools .partial (
568+ self ._wait_for_workspace_group_deletion ,
569+ migrated_group .id_in_workspace ,
570+ migrated_group .temporary_name ,
571+ )
572+ )
573+ deleted_groups .append (migrated_group )
574+ # Step 1: Delete the groups.
575+ _ , errors = Threads .gather ("removing original workspace groups" , deletion_tasks )
576+ if len (errors ) > 0 :
577+ logger .error (f"During deletion of workspace groups got { len (errors )} errors. See debug logs." )
578+ raise ManyError (errors )
579+ # Step 2: Confirm that direct gets no longer return the deleted group.
580+ _ , errors = Threads .gather ("waiting for removal of original workspace groups" , waiting_tasks )
551581 if len (errors ) > 0 :
552- logger .error (f"During account-to- workspace reflection got { len (errors )} errors. See debug logs" )
582+ logger .error (f"Waiting for deletion of workspace groups got { len (errors )} errors. See debug logs. " )
553583 raise ManyError (errors )
584+ # Step 3: Confirm that enumeration no longer returns the deleted groups.
585+ self ._wait_for_deleted_workspace_groups (deleted_groups )
554586
555587 def _fetcher (self ) -> Iterable [MigratedGroup ]:
556588 state = []
@@ -672,7 +704,6 @@ def _list_workspace_groups(self, resource_type: str, scim_attributes: str) -> li
672704 # a strategy of enumerating the bare minimum and request full attributes for each group individually.
673705 attributes = scim_attributes .split ("," )
674706 if "members" in attributes :
675- attributes .remove ("members" )
676707 retry_on_internal_error = retried (on = [InternalError ], timeout = self ._verify_timeout )
677708 get_group = retry_on_internal_error (self ._get_group )
678709 # Limit to the attributes we need for determining if the group is out of scope; the rest are fetched later.
@@ -726,16 +757,86 @@ def _list_account_groups(self, scim_attributes: str) -> list[iam.Group]:
726757 sorted_groups : list [iam .Group ] = sorted (account_groups , key = lambda _ : _ .display_name ) # type: ignore[arg-type,return-value]
727758 return sorted_groups
728759
760+ def _delete_workspace_group_and_wait_for_deletion (self , group_id : str , display_name : str ) -> str :
761+ logger .debug (f"Deleting workspace group: { display_name } (id={ group_id } )" )
762+ self ._delete_workspace_group (group_id , display_name )
763+ logger .debug (f"Waiting for workspace group deletion to take effect: { display_name } (id={ group_id } )" )
764+ self ._wait_for_workspace_group_deletion (group_id , display_name )
765+ return group_id
766+
729767 @retried (on = [InternalError , ResourceConflict , DeadlineExceeded ])
730768 @rate_limited (max_requests = 35 , burst_period_seconds = 60 )
731- def _delete_workspace_group (self , group_id : str , display_name : str ) -> None :
769+ def _rate_limited_group_delete_with_retry (self , group_id : str ) -> None :
732770 try :
733- logger .info (f"Deleting the workspace-level group { display_name } with id { group_id } " )
734771 self ._ws .groups .delete (id = group_id )
735- logger .info (f"Workspace-level group { display_name } with id { group_id } was deleted" )
736- return None
737772 except NotFound :
738- return None
773+ pass
774+
775+ def _delete_workspace_group (self , group_id : str , display_name : str ) -> None :
776+ logger .debug (f"Deleting workspace group: { display_name } (id={ group_id } )" )
777+ self ._rate_limited_group_delete_with_retry (group_id )
778+
779+ @retried (on = [GroupDeletionIncompleteError ], timeout = timedelta (seconds = 90 ))
780+ def _wait_for_workspace_group_deletion (self , group_id : str , display_name : str ) -> None :
781+ # The groups API is eventually consistent, but not monotonically consistent. Here we verify that the group
782+ # has been deleted, and try to compensate for the lack of monotonic consistency by requiring two subsequent
783+ # calls to confirm deletion. REST API internals cache things for up to 60s, and we see times close to this
784+ # during testing. The retry timeout reflects this: if it's taking much longer then something else is wrong.
785+ self ._check_workspace_group_deletion (group_id , display_name , logging .DEBUG )
786+ self ._check_workspace_group_deletion (group_id , display_name , logging .WARNING )
787+ logger .debug (f"Workspace group is assumed deleted: { display_name } (id={ group_id } )" )
788+
789+ def _check_workspace_group_deletion (self , group_id : str , display_name : str , still_present_level_level : int ) -> None :
790+ try :
791+ _ = self ._ws .groups .get (id = group_id )
792+ logger .log (
793+ still_present_level_level ,
794+ f"Deleted group is still present; still waiting for deletion to take effect: { display_name } (id={ group_id } )" ,
795+ )
796+ # Deletion is still pending.
797+ raise GroupDeletionIncompleteError (group_id , display_name )
798+ except NotFound :
799+ logger .debug (f"Workspace group not found; possibly deleted: { display_name } (id={ group_id } )" )
800+
801+ @retried (on = [ManyError ], timeout = timedelta (minutes = 5 ))
802+ def _wait_for_deleted_workspace_groups (self , deleted_workspace_groups : list [MigratedGroup ]) -> None :
803+ # The groups API is eventually consistent, but not monotonically consistent. Here we verify that enumerating
804+ # all groups no longer includes the deleted groups. We try to compensate for the lack of monotonic consistency
805+ # by requiring two subsequent enumerations to omit all deleted groups. REST API internals cache things for up
806+ # to 60s. The retry timeout reflects this, and the fact that enumeration can take a long time for large numbers
807+ # of groups. (Currently there is no way to configure the retry handler to retry at least once, so the timeout
808+ # needs to be high enough to allow at least one retry.)
809+ self ._check_for_deleted_workspace_groups (deleted_workspace_groups , logging .DEBUG )
810+ self ._check_for_deleted_workspace_groups (deleted_workspace_groups , logging .WARNING )
811+ logger .debug (
812+ f"Group enumeration omitted all { len (deleted_workspace_groups )} workspace groups; assuming deleted."
813+ )
814+
815+ def _check_for_deleted_workspace_groups (
816+ self , deleted_workspace_groups : list [MigratedGroup ], still_present_log_level : int
817+ ) -> None :
818+ attributes = "id,displayName"
819+ expected_deletions = {group .id_in_workspace for group in deleted_workspace_groups }
820+ pending_deletions = [
821+ GroupDeletionIncompleteError (group .id , group .display_name )
822+ for group in self ._list_workspace_groups ("WorkspaceGroup" , attributes )
823+ if group .id in expected_deletions
824+ ]
825+ if pending_deletions :
826+ if logger .isEnabledFor (still_present_log_level ):
827+ logger .log (
828+ still_present_log_level ,
829+ f"Group enumeration still contains { len (pending_deletions )} /{ len (expected_deletions )} deleted workspace groups." ,
830+ )
831+ for pending_deletion in pending_deletions :
832+ logger .log (
833+ still_present_log_level ,
834+ f"Group enumeration still contains deleted group: { pending_deletion .display_name } (id={ pending_deletion .group_id } )" ,
835+ )
836+ raise ManyError (pending_deletions )
837+ logger .debug (
838+ f"Group enumeration does not contain any of the { len (expected_deletions )} deleted workspace groups; possibly deleted."
839+ )
739840
740841 @retried (on = [InternalError , ResourceConflict , DeadlineExceeded ])
741842 @rate_limited (max_requests = 5 )
0 commit comments