Skip to content

Commit 0948141

Browse files
authored
Added databricks labs ucx validate-groups-membership command to validate groups to see if they have same membership across acount and workspace level (#772)
1 parent 3fe51c5 commit 0948141

File tree

6 files changed

+201
-1
lines changed

6 files changed

+201
-1
lines changed

labs.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,10 @@ commands:
8686
- name: subscription-id
8787
description: Subscription to scan storage account in
8888

89+
90+
- name: validate-groups-membership
91+
description: Validate the groups to see if the groups at account level and workspace level have different membership
92+
table_template: |-
93+
Workflow Group Name\tAccount Group Name
94+
{{range .}}{{.name_in_workspace}}\t{{.name_in_group}}
95+
{{end}}

src/databricks/labs/ucx/cli.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from databricks.labs.ucx.hive_metastore.table_migrate import TableMove, TablesMigrate
1919
from databricks.labs.ucx.install import WorkspaceInstaller
2020
from databricks.labs.ucx.installer import InstallationManager
21+
from databricks.labs.ucx.workspace_access.groups import GroupManager
2122

2223
ucx = App(__file__)
2324
logger = get_logger(__file__)
@@ -142,6 +143,37 @@ def repair_run(w: WorkspaceClient, step):
142143
installer.repair_run(step)
143144

144145

146+
@ucx.command
147+
def validate_groups_membership(w: WorkspaceClient):
148+
"""Validate the groups to see if the groups at account level and workspace level has different membership"""
149+
installation_manager = InstallationManager(w)
150+
installation = installation_manager.for_user(w.current_user.me())
151+
if not installation:
152+
logger.error(CANT_FIND_UCX_MSG)
153+
return None
154+
warehouse_id = installation.config.warehouse_id
155+
inventory_database = installation.config.inventory_database
156+
renamed_group_prefix = installation.config.renamed_group_prefix
157+
workspace_group_regex = installation.config.workspace_group_regex
158+
workspace_group_replace = installation.config.workspace_group_replace
159+
account_group_regex = installation.config.account_group_regex
160+
include_group_names = installation.config.include_group_names
161+
sql_backend = StatementExecutionBackend(w, warehouse_id)
162+
logger.info("Validating Groups which are having different memberships between account and workspace")
163+
group_manager = GroupManager(
164+
sql_backend=sql_backend,
165+
ws=w,
166+
inventory_database=inventory_database,
167+
include_group_names=include_group_names,
168+
renamed_group_prefix=renamed_group_prefix,
169+
workspace_group_regex=workspace_group_regex,
170+
workspace_group_replace=workspace_group_replace,
171+
account_group_regex=account_group_regex,
172+
)
173+
mismatch_groups = group_manager.validate_group_membership()
174+
print(json.dumps(mismatch_groups))
175+
176+
145177
@ucx.command
146178
def revert_migrated_tables(w: WorkspaceClient, schema: str, table: str, *, delete_managed: bool = False):
147179
"""remove notation on a migrated table for re-migration"""

src/databricks/labs/ucx/workspace_access/groups.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,33 @@ def _crawler(self) -> Iterable[MigratedGroup]:
406406
strategy = self._get_strategy(workspace_groups_in_workspace, account_groups_in_account)
407407
yield from strategy.generate_migrated_groups()
408408

409+
def validate_group_membership(self) -> list[dict]:
410+
workspace_groups_in_workspace = self._workspace_groups_in_workspace()
411+
account_groups_in_account = self._account_groups_in_account()
412+
strategy = self._get_strategy(workspace_groups_in_workspace, account_groups_in_account)
413+
migrated_groups = strategy.generate_migrated_groups()
414+
mismatch_group = []
415+
for groups in migrated_groups:
416+
ws_members_set = set([m.get("display") for m in json.loads(groups.members)] if groups.members else [])
417+
account_group = account_groups_in_account[groups.name_in_account]
418+
ac_members_set = set(
419+
[a.as_dict().get("display") for a in account_group.members] if account_group.members else []
420+
)
421+
set_diff = (ws_members_set - ac_members_set).union(ac_members_set - ws_members_set)
422+
if not set_diff:
423+
continue
424+
mismatch_group.append(
425+
{
426+
"wf_group_name": groups.name_in_workspace,
427+
"ac_group_name": groups.name_in_account,
428+
}
429+
)
430+
if not mismatch_group:
431+
logger.info("There are no groups with different membership between account and workspace")
432+
else:
433+
logger.info("There are groups with different membership between account and workspace")
434+
return mismatch_group
435+
409436
def _workspace_groups_in_workspace(self) -> dict[str, Group]:
410437
attributes = "id,displayName,meta,externalId,members,roles,entitlements"
411438
groups = {}

tests/integration/workspace_access/test_groups.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,38 @@ def test_group_matching_names(ws, sql_backend, inventory_schema, make_ucx_group,
206206
validate_migrate_groups(group_manager, ws_group, accnt_group)
207207

208208

209+
@retried(on=[NotFound], timeout=timedelta(minutes=2))
210+
def test_group_matching_names_with_diff_users(
211+
ws, sql_backend, inventory_schema, make_random, make_user, make_group, make_acc_group
212+
):
213+
rand_elem = make_random(4)
214+
workspace_group_name = f"test_group_{rand_elem}"
215+
account_group_name = f"same_group_[{rand_elem}]"
216+
user1 = make_user()
217+
user2 = make_user()
218+
members1 = [user1.id]
219+
members2 = [user2.id]
220+
ws_group = make_group(display_name=workspace_group_name, members=members1, entitlements=["allow-cluster-create"])
221+
accnt_group = make_acc_group(display_name=account_group_name, members=members2)
222+
223+
logger.info(
224+
f"Attempting Mapping From Workspace Group {ws_group.display_name} to "
225+
f"Account Group {accnt_group.display_name}"
226+
)
227+
group_manager = GroupManager(
228+
sql_backend,
229+
ws,
230+
inventory_schema,
231+
[ws_group.display_name],
232+
"ucx-temp-",
233+
workspace_group_regex=r"([0-9a-zA-Z]*)$",
234+
account_group_regex=r"\[([0-9a-zA-Z]*)\]",
235+
)
236+
237+
t = group_manager.validate_group_membership()
238+
assert len(t) > 0
239+
240+
209241
# average runtime is 100 seconds
210242
@retried(on=[NotFound], timeout=timedelta(minutes=3))
211243
def test_replace_workspace_groups_with_account_groups(

tests/unit/test_cli.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@
1919
skip,
2020
sync_workspace_info,
2121
validate_external_locations,
22+
validate_groups_membership,
2223
workflows,
2324
)
25+
from databricks.labs.ucx.config import WorkspaceConfig
26+
from databricks.labs.ucx.installer import Installation
2427

2528

2629
def test_workflow(caplog):
@@ -320,3 +323,41 @@ def test_save_azure_storage_accounts(mocker, caplog):
320323
) as m:
321324
save_azure_storage_accounts(w, "test")
322325
m.assert_called_once()
326+
327+
328+
def test_validate_groups_membership(mocker, caplog):
329+
w = create_autospec(WorkspaceClient)
330+
inst_data = Installation(
331+
config=WorkspaceConfig(
332+
inventory_database="test_database",
333+
workspace_group_regex=None,
334+
workspace_group_replace=None,
335+
account_group_regex=None,
336+
group_match_by_external_id=False,
337+
include_group_names=None,
338+
renamed_group_prefix="db-temp-",
339+
warehouse_id="test_id",
340+
database_to_catalog_mapping=None,
341+
default_catalog="ucx_default",
342+
),
343+
user="test_user",
344+
path="/Users/[email protected]/.ucx",
345+
)
346+
w.current_user.me = lambda: iam.User(user_name="[email protected]", groups=[iam.ComplexValue(display="admins")])
347+
mocker.patch("databricks.labs.ucx.installer.InstallationManager.__init__", return_value=None)
348+
mocker.patch("databricks.labs.ucx.installer.InstallationManager.for_user", return_value=inst_data)
349+
mocker.patch("databricks.labs.ucx.workspace_access.groups.GroupManager.__init__", return_value=None)
350+
mocker.patch(
351+
"databricks.labs.ucx.workspace_access.groups.GroupManager.validate_group_membership",
352+
return_value={"wf_group_name": "test_group", "ac_group_name": "test_group"},
353+
)
354+
validate_groups_membership(w)
355+
assert caplog.messages == ["Validating Groups which are having different memberships between account and workspace"]
356+
357+
358+
def test_validate_group_no_ucx(mocker, caplog):
359+
w = create_autospec(WorkspaceClient)
360+
w.current_user.me = lambda: iam.User(user_name="test_user", groups=[iam.ComplexValue(display="admins")])
361+
mocker.patch("databricks.labs.ucx.installer.InstallationManager.for_user", return_value=None)
362+
validate_groups_membership(w)
363+
assert "Couldn't find UCX configuration" in caplog.messages[0]

tests/unit/workspace_access/test_groups.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import json
2-
from unittest.mock import MagicMock
2+
from unittest.mock import MagicMock, create_autospec
33

44
import pytest
55
from _pytest.outcomes import fail
66
from databricks.labs.blueprint.parallel import ManyError
77
from databricks.labs.blueprint.tui import MockPrompts
8+
from databricks.sdk import WorkspaceClient
89
from databricks.sdk.errors import DatabricksError, ResourceDoesNotExist
910
from databricks.sdk.service import iam
1011
from databricks.sdk.service.iam import ComplexValue, Group, ResourceMeta
1112

13+
from databricks.labs.ucx.framework.crawlers import SqlBackend
1214
from databricks.labs.ucx.workspace_access.groups import (
1315
ConfigureGroups,
1416
GroupManager,
@@ -824,3 +826,62 @@ def test_state():
824826
assert not state.get_target_principal("invalid_group_name")
825827
assert not state.get_temp_principal("invalid_group_name")
826828
assert not state.is_in_scope("invalid_group_name")
829+
830+
831+
def test_validate_group_diff_membership():
832+
backend = create_autospec(SqlBackend)
833+
wsclient = create_autospec(WorkspaceClient)
834+
group = Group(
835+
id="1",
836+
external_id="1234",
837+
display_name="test_(1234)",
838+
meta=ResourceMeta(resource_type="WorkspaceGroup"),
839+
members=[ComplexValue(display="test-user-1", value="20"), ComplexValue(display="test-user-2", value="21")],
840+
roles=[
841+
ComplexValue(value="arn:aws:iam::123456789098:instance-profile/ip1"),
842+
ComplexValue(value="arn:aws:iam::123456789098:instance-profile/ip2"),
843+
],
844+
entitlements=[ComplexValue(value="allow-cluster-create"), ComplexValue(value="allow-instance-pool-create")],
845+
)
846+
wsclient.groups.list.return_value = [group]
847+
wsclient.groups.get.return_value = group
848+
account_admins_group = Group(id="1234", external_id="1234", display_name="ac_test_1234")
849+
wsclient.api_client.do.return_value = {
850+
"Resources": [g.as_dict() for g in [account_admins_group]],
851+
}
852+
grp_membership = GroupManager(
853+
backend, wsclient, inventory_database="inv", workspace_group_regex=r"\(([1-9]+)\)", account_group_regex="[1-9]+"
854+
).validate_group_membership()
855+
assert grp_membership == [{"wf_group_name": "test_(1234)", "ac_group_name": "ac_test_1234"}]
856+
857+
858+
def test_validate_group_same_membership():
859+
backend = MockBackend()
860+
wsclient = MagicMock()
861+
group = Group(
862+
id="1",
863+
external_id="1234",
864+
display_name="test_(1234)",
865+
meta=ResourceMeta(resource_type="WorkspaceGroup"),
866+
members=[ComplexValue(display="test-user-1", value="01"), ComplexValue(display="test-user-2", value="02")],
867+
roles=[
868+
ComplexValue(value="arn:aws:iam::123456789098:instance-profile/test_ip1"),
869+
ComplexValue(value="arn:aws:iam::123456789098:instance-profile/test_ip2"),
870+
],
871+
entitlements=[ComplexValue(value="allow-cluster-create"), ComplexValue(value="allow-instance-pool-create")],
872+
)
873+
wsclient.groups.list.return_value = [group]
874+
wsclient.groups.get.return_value = group
875+
account_admins_group = Group(
876+
id="1234",
877+
external_id="1234",
878+
display_name="ac_test_1234",
879+
members=[ComplexValue(display="test-user-1", value="01"), ComplexValue(display="test-user-2", value="02")],
880+
)
881+
wsclient.api_client.do.return_value = {
882+
"Resources": [g.as_dict() for g in [account_admins_group]],
883+
}
884+
grp_membership = GroupManager(
885+
backend, wsclient, inventory_database="inv", workspace_group_regex=r"\(([1-9]+)\)", account_group_regex="[1-9]+"
886+
).validate_group_membership()
887+
assert grp_membership == []

0 commit comments

Comments
 (0)