Skip to content

Commit b654a3a

Browse files
Prefetch all account-level and workspace-level groups (#192)
Addresses #190 and other SCIM-related issues. Conceptually, the following happens: 1. Existing method to list the workspace groups is inconsistent, and the root cause of these issues is somewhere on SCIM API side. We've raised a ticket, but the resolution timing is unclear. 2. To avoid being time-dependent on SCIM API, we re-introduce the listing methods, but this time we intentionally don't use `filter`-based conditions and explicitly split the API methods into two. 3. To avoid reaching out to the SCIM API too frequently, we list the whole API only once and then work with an in-memory data. This PR also adds 100% coverage for `GroupManager` and adds integration tests for groups to verify it works as expected.
1 parent f4e5989 commit b654a3a

File tree

3 files changed

+169
-228
lines changed

3 files changed

+169
-228
lines changed

src/databricks/labs/ucx/managers/group.py

Lines changed: 52 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from functools import partial
55

66
from databricks.sdk import WorkspaceClient
7-
from databricks.sdk.service.iam import Group
7+
from databricks.sdk.service import iam
88
from ratelimit import limits, sleep_and_retry
99

1010
from databricks.labs.ucx.config import GroupsConfig
@@ -30,40 +30,44 @@ def __init__(self, ws: WorkspaceClient, groups: GroupsConfig):
3030
self._ws = ws
3131
self.config = groups
3232
self._migration_state: GroupMigrationState = GroupMigrationState()
33+
self._account_groups = self._list_account_groups()
34+
self._workspace_groups = self._list_workspace_groups()
35+
36+
def _list_workspace_groups(self) -> list[iam.Group]:
37+
logger.debug("Listing workspace groups...")
38+
workspace_groups = [
39+
g
40+
for g in self._ws.groups.list(attributes="id,displayName,meta")
41+
if g.meta.resource_type == "WorkspaceGroup" and g.display_name not in self.SYSTEM_GROUPS
42+
]
43+
logger.debug(f"Found {len(workspace_groups)} workspace groups")
44+
return workspace_groups
45+
46+
def _list_account_groups(self) -> list[iam.Group]:
47+
# TODO: we should avoid using this method, as it's not documented
48+
# unfortunately, there's no other way to consistently get the list of account groups
49+
logger.debug("Listing account groups...")
50+
account_groups = [
51+
iam.Group.from_dict(r)
52+
for r in self._ws.api_client.do(
53+
"get",
54+
"/api/2.0/account/scim/v2/Groups",
55+
query={
56+
"attributes": "id,displayName,meta",
57+
},
58+
).get("Resources", [])
59+
]
60+
account_groups = [g for g in account_groups if g.display_name not in self.SYSTEM_GROUPS]
61+
logger.debug(f"Found {len(account_groups)} account groups")
62+
return account_groups
3363

34-
# please keep the internal methods below this line
35-
36-
def _find_eligible_groups(self) -> list[str]:
37-
logger.info("Finding eligible groups automatically")
38-
_display_name_filter = " and ".join([f'displayName ne "{group}"' for group in GroupManager.SYSTEM_GROUPS])
39-
ws_groups = list(self._ws.groups.list(attributes="displayName,meta", filter=_display_name_filter))
40-
eligible_groups = [g for g in ws_groups if g.meta.resource_type == "WorkspaceGroup"]
41-
logger.info(f"Found {len(eligible_groups)} eligible groups")
42-
return [g.display_name for g in eligible_groups]
64+
def _get_group(self, group_name, level: GroupLevel) -> iam.Group | None:
65+
relevant_level_groups = self._workspace_groups if level == GroupLevel.WORKSPACE else self._account_groups
66+
for group in relevant_level_groups:
67+
if group.display_name == group_name:
68+
return group
4369

44-
@sleep_and_retry
45-
@limits(calls=100, period=1) # assumption
46-
def _list_account_level_groups(
47-
self, filter: str, attributes: str | None = None, excluded_attributes: str | None = None # noqa: A002
48-
) -> list[Group]:
49-
query = {"filter": filter, "attributes": attributes, "excludedAttributes": excluded_attributes}
50-
response = self._ws.api_client.do("GET", "/api/2.0/account/scim/v2/Groups", query=query)
51-
return [Group.from_dict(v) for v in response.get("Resources", [])]
52-
53-
def _get_group(self, group_name, level: GroupLevel) -> Group | None:
54-
# TODO: calling this can cause issues for SCIM backend, cache groups instead
55-
method = self._ws.groups.list if level == GroupLevel.WORKSPACE else self._list_account_level_groups
56-
query_filter = f"displayName eq '{group_name}'"
57-
attributes = ",".join(["id", "displayName", "meta", "entitlements", "roles", "members"])
58-
59-
group = next(
60-
iter(method(filter=query_filter, attributes=attributes)),
61-
None,
62-
)
63-
64-
return group
65-
66-
def _get_or_create_backup_group(self, source_group_name: str, source_group: Group) -> Group:
70+
def _get_or_create_backup_group(self, source_group_name: str, source_group: iam.Group) -> iam.Group:
6771
backup_group_name = f"{self.config.backup_group_prefix}{source_group_name}"
6872
backup_group = self._get_group(backup_group_name, GroupLevel.WORKSPACE)
6973

@@ -78,6 +82,7 @@ def _get_or_create_backup_group(self, source_group_name: str, source_group: Grou
7882
roles=source_group.roles,
7983
members=source_group.members,
8084
)
85+
self._workspace_groups.append(backup_group)
8186
logger.info(f"Backup group {backup_group_name} successfully created")
8287

8388
return backup_group
@@ -101,20 +106,20 @@ def get_group_info(name: str):
101106

102107
def _replace_group(self, migration_info: MigrationGroupInfo):
103108
ws_group = migration_info.workspace
104-
acc_group = migration_info.account
105109

106-
if self._get_group(ws_group.display_name, GroupLevel.WORKSPACE):
107-
logger.info(f"Deleting the workspace-level group {ws_group.display_name} with id {ws_group.id}")
108-
self._ws.groups.delete(ws_group.id)
109-
logger.info(f"Workspace-level group {ws_group.display_name} with id {ws_group.id} was deleted")
110-
else:
111-
logger.warning(f"Workspace-level group {ws_group.display_name} does not exist, skipping")
110+
logger.info(f"Deleting the workspace-level group {ws_group.display_name} with id {ws_group.id}")
111+
self._ws.groups.delete(ws_group.id)
112+
113+
# delete ws_group from the list of workspace groups
114+
self._workspace_groups = [g for g in self._workspace_groups if g.id != ws_group.id]
115+
116+
logger.info(f"Workspace-level group {ws_group.display_name} with id {ws_group.id} was deleted")
112117

113-
self._reflect_account_group_to_workspace(acc_group)
118+
self._reflect_account_group_to_workspace(migration_info.account)
114119

115120
@sleep_and_retry
116121
@limits(calls=5, period=1) # assumption
117-
def _reflect_account_group_to_workspace(self, acc_group: Group) -> None:
122+
def _reflect_account_group_to_workspace(self, acc_group: iam.Group) -> None:
118123
logger.info(f"Reflecting group {acc_group.display_name} to workspace")
119124

120125
# TODO: add OpenAPI spec for it
@@ -136,11 +141,14 @@ def prepare_groups_in_environment(self):
136141

137142
for g in self.config.selected:
138143
assert g not in self.SYSTEM_GROUPS, f"Cannot migrate system group {g}"
144+
assert self._get_group(g, GroupLevel.WORKSPACE), f"Group {g} not found on the workspace level"
145+
assert self._get_group(g, GroupLevel.ACCOUNT), f"Group {g} not found on the account level"
139146

140147
self._set_migration_groups(self.config.selected)
141148
else:
142-
logger.info("No group listing provided, finding eligible groups automatically")
143-
self._set_migration_groups(groups_names=self._find_eligible_groups())
149+
logger.info("No group listing provided, all available workspace-level groups will be used")
150+
available_group_names = [g.display_name for g in self._workspace_groups]
151+
self._set_migration_groups(groups_names=available_group_names)
144152
logger.info("Environment prepared successfully")
145153

146154
@property

tests/integration/test_group.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from databricks.sdk import WorkspaceClient
2+
3+
from databricks.labs.ucx.config import GroupsConfig
4+
from databricks.labs.ucx.managers.group import GroupLevel, GroupManager
5+
6+
7+
def test_group_listing(ws: WorkspaceClient, make_ucx_group):
8+
ws_group, acc_group = make_ucx_group()
9+
manager = GroupManager(ws, GroupsConfig(selected=[ws_group.display_name]))
10+
assert ws_group.display_name in [g.display_name for g in manager._workspace_groups]
11+
assert acc_group.display_name in [g.display_name for g in manager._account_groups]
12+
13+
14+
def test_id_validity(ws: WorkspaceClient, make_ucx_group):
15+
ws_group, acc_group = make_ucx_group()
16+
manager = GroupManager(ws, GroupsConfig(selected=[ws_group.display_name]))
17+
assert ws_group.id == manager._get_group(ws_group.display_name, GroupLevel.WORKSPACE).id
18+
assert acc_group.id == manager._get_group(acc_group.display_name, GroupLevel.ACCOUNT).id

0 commit comments

Comments
 (0)