Skip to content

Commit aaf7c00

Browse files
authored
Extended test suite for HMS->HMS TACL migration (#439)
1 parent 9479258 commit aaf7c00

File tree

9 files changed

+154
-25
lines changed

9 files changed

+154
-25
lines changed

docs/local-group-migration.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,21 @@ from databricks.labs.ucx.config import GroupsConfig
244244

245245
group_manager = GroupManager(ws, GroupsConfig(auto=True))
246246
group_manager.ws_local_group_deletion_recovery()
247+
```
248+
249+
3. Recover Table ACL from `$inventory.grants` to `$inventory.permissions`:
250+
251+
```python
252+
from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
253+
from databricks.labs.ucx.workspace_access.manager import PermissionManager
254+
from databricks.labs.ucx.workspace_access.tacl import TableAclSupport
255+
from databricks.labs.ucx.framework.crawlers import RuntimeBackend
256+
257+
sql_backend = RuntimeBackend()
258+
inventory_schema = cfg.inventory_database
259+
tables = TablesCrawler(sql_backend, inventory_schema)
260+
grants = GrantsCrawler(tables)
261+
tacl = TableAclSupport(grants, sql_backend)
262+
permission_manager = PermissionManager(sql_backend, inventory_schema, [tacl])
263+
permission_manager.inventorize_permissions()
247264
```

src/databricks/labs/ucx/framework/parallel.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ def _execute(self):
6969
with ThreadPoolExecutor(self._num_threads) as pool:
7070
futures = []
7171
for task in self._tasks:
72+
if task is None:
73+
continue
7274
future = pool.submit(self._wrap_result(task, self._name))
7375
future.add_done_callback(self._progress_report)
7476
futures.append(future)
@@ -98,7 +100,7 @@ def inner(*args, **kwargs):
98100
try:
99101
return func(*args, **kwargs), None
100102
except Exception as err:
101-
logger.error(f"{name} task failed: {err!s}")
103+
logger.error(f"{name} task failed: {err!s}", exc_info=err)
102104
return None, err
103105

104106
return inner

src/databricks/labs/ucx/workspace_access/manager.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import os
34
from collections.abc import Callable, Iterator
@@ -80,7 +81,7 @@ def apply_group_permissions(self, migration_state: GroupMigrationState, destinat
8081
if len(migration_state.groups) == 0:
8182
logger.info("No valid groups selected, nothing to do.")
8283
return True
83-
items = sorted(self._load_all(), key=lambda i: i.object_type)
84+
items = sorted(self.load_all(), key=lambda i: i.object_type)
8485
logger.info(
8586
f"Applying the permissions to {destination} groups. "
8687
f"Total groups to apply permissions: {len(migration_state.groups)}. "
@@ -104,6 +105,9 @@ def apply_group_permissions(self, migration_state: GroupMigrationState, destinat
104105
tasks_for_support = [
105106
relevant_support.get_apply_task(item, migration_state, destination) for item in items_subset
106107
]
108+
tasks_for_support = [_ for _ in tasks_for_support if _ is not None]
109+
if len(tasks_for_support) == 0:
110+
continue
107111
logger.info(f"Total tasks for {object_type}: {len(tasks_for_support)}")
108112
applier_tasks.extend(tasks_for_support)
109113

@@ -132,16 +136,23 @@ def cleanup(self):
132136
logger.info("Inventory table cleanup complete")
133137

134138
def _save(self, items: list[Permissions]):
139+
# keep in mind, that object_type and object_id are not primary keys.
135140
self._append_records(items) # TODO: update instead of append
136141
logger.info("Successfully saved the items to inventory table")
137142

138-
def _load_all(self) -> list[Permissions]:
143+
def load_all(self) -> list[Permissions]:
139144
logger.info(f"Loading inventory table {self._full_name}")
140145
return [
141146
Permissions(object_id, object_type, raw)
142147
for object_id, object_type, raw in self._fetch(f"SELECT object_id, object_type, raw FROM {self._full_name}")
143148
]
144149

150+
def load_all_for(self, object_type: str, object_id: str, klass: type) -> any:
151+
for perm in self.load_all():
152+
if object_type == perm.object_type and object_id == perm.object_id:
153+
raw = json.loads(perm.raw)
154+
yield klass(**raw)
155+
145156
def _get_crawler_tasks(self) -> Iterator[Callable[..., Permissions | None]]:
146157
for support in self._acl_support:
147158
yield from support.get_crawler_tasks()

tests/integration/workspace_access/test_groups.py

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
import logging
2+
from datetime import timedelta
23

34
from databricks.sdk import WorkspaceClient
5+
from databricks.sdk.retries import retried
46
from databricks.sdk.service.iam import PermissionLevel
57

68
from databricks.labs.ucx.config import GroupsConfig
79
from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
10+
from databricks.labs.ucx.hive_metastore.grants import Grant
811
from databricks.labs.ucx.workspace_access.generic import (
9-
GenericPermissionsSupport, Listing,
12+
GenericPermissionsSupport,
13+
Listing,
1014
)
1115
from databricks.labs.ucx.workspace_access.groups import GroupManager
1216
from databricks.labs.ucx.workspace_access.manager import PermissionManager
17+
from databricks.labs.ucx.workspace_access.tacl import TableAclSupport
1318

1419
logger = logging.getLogger(__name__)
1520

@@ -89,47 +94,82 @@ def test_replace_workspace_groups_with_account_groups(
8994
generic_permissions = GenericPermissionsSupport(
9095
ws, [Listing(ws.cluster_policies.list, "policy_id", "cluster-policies")]
9196
)
92-
permission_manager = PermissionManager(sql_backend, inventory_schema, [generic_permissions])
9397
tables = TablesCrawler(sql_backend, inventory_schema)
9498
grants = GrantsCrawler(tables)
99+
tacl = TableAclSupport(grants, sql_backend)
100+
permission_manager = PermissionManager(sql_backend, inventory_schema, [generic_permissions, tacl])
95101

96102
permission_manager.inventorize_permissions()
97103

104+
dummy_grants = list(permission_manager.load_all_for("TABLE", dummy_table.full_name, Grant))
105+
assert 2 == len(dummy_grants)
106+
98107
table_permissions = grants.for_table_info(dummy_table)
99-
print(table_permissions)
108+
assert ws_group.display_name in table_permissions
109+
assert "SELECT" in table_permissions[ws_group.display_name]
100110

101111
permission_manager.apply_group_permissions(group_manager.migration_state, destination="backup")
102112

113+
@retried(on=[AssertionError], timeout=timedelta(seconds=30))
114+
def check_table_permissions_for_backup_group():
115+
table_permissions = grants.for_table_info(dummy_table)
116+
assert group_info.workspace.display_name in table_permissions
117+
assert group_info.backup.display_name in table_permissions
118+
assert "SELECT" in table_permissions[group_info.workspace.display_name]
119+
assert "SELECT" in table_permissions[group_info.backup.display_name]
120+
121+
check_table_permissions_for_backup_group()
122+
103123
policy_permissions = generic_permissions.load_as_dict("cluster-policies", cluster_policy.policy_id)
104124
assert PermissionLevel.CAN_USE == policy_permissions[group_info.workspace.display_name]
105125
assert PermissionLevel.CAN_USE == policy_permissions[group_info.backup.display_name]
106126

107127
group_manager.replace_workspace_groups_with_account_groups()
108128

109-
table_permissions = grants.for_table_info(dummy_table)
110-
print(table_permissions)
129+
@retried(on=[AssertionError], timeout=timedelta(seconds=30))
130+
def check_table_permissions_for_account_group():
131+
table_permissions = grants.for_table_info(dummy_table)
132+
assert group_info.account.display_name in table_permissions
133+
assert group_info.backup.display_name in table_permissions
134+
assert "SELECT" in table_permissions[group_info.backup.display_name]
135+
136+
check_table_permissions_for_account_group()
111137

112138
policy_permissions = generic_permissions.load_as_dict("cluster-policies", cluster_policy.policy_id)
113139
assert group_info.workspace.display_name not in policy_permissions
114140
assert PermissionLevel.CAN_USE == policy_permissions[group_info.backup.display_name]
115141

116142
permission_manager.apply_group_permissions(group_manager.migration_state, destination="account")
117143

118-
table_permissions = grants.for_table_info(dummy_table)
119-
print(table_permissions)
144+
@retried(on=[AssertionError], timeout=timedelta(seconds=30))
145+
def check_table_permissions_for_account_group():
146+
table_permissions = grants.for_table_info(dummy_table)
147+
assert group_info.account.display_name in table_permissions
148+
assert group_info.backup.display_name in table_permissions
149+
assert "SELECT" in table_permissions[group_info.backup.display_name]
150+
assert "SELECT" in table_permissions[group_info.account.display_name]
151+
152+
check_table_permissions_for_account_group()
120153

121154
policy_permissions = generic_permissions.load_as_dict("cluster-policies", cluster_policy.policy_id)
122155
assert PermissionLevel.CAN_USE == policy_permissions[group_info.account.display_name]
123156
assert PermissionLevel.CAN_USE == policy_permissions[group_info.backup.display_name]
124157

125-
# TODO: check hive grants as well
126-
127158
for _info in group_manager.migration_state.groups:
128159
ws.groups.delete(_info.backup.id)
129160

130161
policy_permissions = generic_permissions.load_as_dict("cluster-policies", cluster_policy.policy_id)
131162
assert group_info.backup.display_name not in policy_permissions
132163

164+
@retried(on=[AssertionError], timeout=timedelta(seconds=30))
165+
def check_table_permissions_after_backup_delete():
166+
table_permissions = grants.for_table_info(dummy_table)
167+
assert group_info.backup.display_name not in table_permissions
168+
assert group_info.account.display_name in table_permissions
169+
assert "SELECT" in table_permissions[group_info.account.display_name]
170+
171+
check_table_permissions_after_backup_delete()
172+
133173

134174
def test_group_listing(ws: WorkspaceClient, make_ucx_group):
135175
ws_group, acc_group = make_ucx_group()

tests/integration/workspace_access/test_permissions_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ def test_permissions_save_and_load(ws, sql_backend, inventory_schema, env_or_ski
1111
]
1212

1313
pi._save(saved)
14-
loaded = pi._load_all()
14+
loaded = pi.load_all()
1515

1616
assert saved == loaded
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import logging
2+
3+
from databricks.sdk.service.iam import PermissionLevel
4+
5+
from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
6+
from databricks.labs.ucx.workspace_access.generic import (
7+
GenericPermissionsSupport,
8+
Listing,
9+
)
10+
from databricks.labs.ucx.workspace_access.manager import PermissionManager
11+
from databricks.labs.ucx.workspace_access.tacl import TableAclSupport
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def test_recover_permissions_from_grants(
17+
ws,
18+
sql_backend,
19+
inventory_schema,
20+
make_ucx_group,
21+
make_group,
22+
make_acc_group,
23+
make_cluster_policy,
24+
make_cluster_policy_permissions,
25+
make_table,
26+
):
27+
ws_group, _ = make_ucx_group()
28+
cluster_policy = make_cluster_policy()
29+
make_cluster_policy_permissions(
30+
object_id=cluster_policy.policy_id,
31+
permission_level=PermissionLevel.CAN_USE,
32+
group_name=ws_group.display_name,
33+
)
34+
35+
dummy_table = make_table()
36+
sql_backend.execute(f"GRANT SELECT ON TABLE {dummy_table.full_name} TO `{ws_group.display_name}`")
37+
38+
generic_permissions = GenericPermissionsSupport(
39+
ws, [Listing(ws.cluster_policies.list, "policy_id", "cluster-policies")]
40+
)
41+
tables = TablesCrawler(sql_backend, inventory_schema)
42+
grants = GrantsCrawler(tables)
43+
tacl = TableAclSupport(grants, sql_backend)
44+
45+
# simulate: Table ACLs were not part of $inventory.permissions
46+
permission_manager = PermissionManager(sql_backend, inventory_schema, [generic_permissions])
47+
permission_manager.inventorize_permissions()
48+
49+
object_types = set()
50+
for perm in permission_manager.load_all():
51+
object_types.add(perm.object_type)
52+
assert {"cluster-policies"} == object_types
53+
54+
# simulate: recovery of Table ACLs as part of $inventory.permissions
55+
permission_manager = PermissionManager(sql_backend, inventory_schema, [tacl])
56+
permission_manager.inventorize_permissions()
57+
58+
# simulate: normal flow
59+
permission_manager = PermissionManager(sql_backend, inventory_schema, [generic_permissions, tacl])
60+
object_types = set()
61+
for perm in permission_manager.load_all():
62+
object_types.add(perm.object_type)
63+
assert {"CATALOG", "TABLE", "DATABASE", "cluster-policies"} == object_types

tests/unit/workspace_access/test_base.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
Destination,
99
Permissions,
1010
)
11-
from databricks.labs.ucx.workspace_access.groups import (
12-
GroupMigrationState,
13-
)
11+
from databricks.labs.ucx.workspace_access.groups import GroupMigrationState
1412

1513

1614
def test_applier():

tests/unit/workspace_access/test_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_load_all():
6060
)
6161
pi = PermissionManager(b, "test_database", [])
6262

63-
output = pi._load_all()
63+
output = pi.load_all()
6464
assert output[0] == Permissions("object1", "clusters", "test acl")
6565

6666

tests/unit/workspace_access/test_tacl.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@
44

55
from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
66
from databricks.labs.ucx.workspace_access.base import Permissions
7-
from databricks.labs.ucx.workspace_access.groups import (
8-
GroupMigrationState,
9-
MigrationGroupInfo,
10-
)
7+
from databricks.labs.ucx.workspace_access.groups import GroupMigrationState
118
from databricks.labs.ucx.workspace_access.tacl import TableAclSupport
129

1310
from ..framework.mocks import MockBackend
@@ -51,7 +48,9 @@ def test_tacl_applier(mocker):
5148
),
5249
)
5350
migration_state = GroupMigrationState()
54-
migration_state.add(Group(display_name="abc"), Group(display_name="tmp-backup-abc"), Group(display_name="account-abc"))
51+
migration_state.add(
52+
Group(display_name="abc"), Group(display_name="tmp-backup-abc"), Group(display_name="account-abc")
53+
)
5554
task = table_acl_support.get_apply_task(permissions, migration_state, "backup")
5655
task()
5756

@@ -77,9 +76,8 @@ def test_tacl_applier_no_target_principal(mocker):
7776
)
7877
migration_state = GroupMigrationState()
7978
migration_state.add(
80-
Group(display_name="abc"),
81-
Group(display_name="tmp-backup-abc"),
82-
Group(display_name="account-abc"))
79+
Group(display_name="abc"), Group(display_name="tmp-backup-abc"), Group(display_name="account-abc")
80+
)
8381
task = table_acl_support.get_apply_task(permissions, migration_state, "backup")
8482
assert task is None
8583

0 commit comments

Comments
 (0)