Skip to content

Commit a1014a4

Browse files
authored
Added Legacy Table ACL grants migration (#1054)
## Changes <!-- Summary of your changes that are easy to understand. Add screenshots when necessary --> ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> Resolves #340 To be followed up with PRs for #887 #907 ### Functionality - [x] modified existing workflow: `migrate-tables` ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] manually tested - [x] added unit tests - [x] added integration tests - [x] verified on staging environment (screenshot attached)
1 parent 6ab4ae2 commit a1014a4

File tree

7 files changed

+372
-74
lines changed

7 files changed

+372
-74
lines changed

src/databricks/labs/ucx/hive_metastore/mapping.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import re
3+
from collections.abc import Collection
34
from dataclasses import dataclass
45
from functools import partial
56

@@ -118,7 +119,7 @@ def skip_schema(self, schema: str):
118119
except BadRequest as err:
119120
logger.error(err)
120121

121-
def get_tables_to_migrate(self, tables_crawler: TablesCrawler):
122+
def get_tables_to_migrate(self, tables_crawler: TablesCrawler) -> Collection[TableToMigrate]:
122123
rules = self.load()
123124
# Getting all the source tables from the rules
124125
databases_in_scope = self._get_databases_in_scope({rule.src_schema for rule in rules})

src/databricks/labs/ucx/hive_metastore/table_migrate.py

Lines changed: 75 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import dataclasses
12
import datetime
23
import logging
34
from collections import defaultdict
@@ -13,9 +14,17 @@
1314
from databricks.labs.ucx.config import WorkspaceConfig
1415
from databricks.labs.ucx.framework.crawlers import CrawlerBase
1516
from databricks.labs.ucx.framework.utils import escape_sql_identifier
16-
from databricks.labs.ucx.hive_metastore import TablesCrawler
17+
from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
18+
from databricks.labs.ucx.hive_metastore.grants import Grant
1719
from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping
18-
from databricks.labs.ucx.hive_metastore.tables import MigrationCount, Table, What
20+
from databricks.labs.ucx.hive_metastore.tables import (
21+
AclMigrationWhat,
22+
MigrationCount,
23+
Table,
24+
What,
25+
)
26+
from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
27+
from databricks.labs.ucx.workspace_access.groups import GroupManager, MigratedGroup
1928

2029
logger = logging.getLogger(__name__)
2130

@@ -33,16 +42,20 @@ class MigrationStatus:
3342
class TablesMigrate:
3443
def __init__(
3544
self,
36-
tables_crawler: TablesCrawler,
45+
table_crawler: TablesCrawler,
46+
grant_crawler: GrantsCrawler,
3747
ws: WorkspaceClient,
3848
backend: SqlBackend,
3949
table_mapping: TableMapping,
50+
group_manager: GroupManager,
4051
migration_status_refresher,
4152
):
42-
self._tc = tables_crawler
53+
self._tc = table_crawler
54+
self._gc = grant_crawler
4355
self._backend = backend
4456
self._ws = ws
4557
self._tm = table_mapping
58+
self._group = group_manager
4659
self._migration_status_refresher = migration_status_refresher
4760
self._seen_tables: dict[str, str] = {}
4861

@@ -52,33 +65,50 @@ def for_cli(cls, ws: WorkspaceClient, product='ucx'):
5265
config = installation.load(WorkspaceConfig)
5366
sql_backend = StatementExecutionBackend(ws, config.warehouse_id)
5467
table_crawler = TablesCrawler(sql_backend, config.inventory_database)
68+
udfs_crawler = UdfsCrawler(sql_backend, config.inventory_database)
69+
grants_crawler = GrantsCrawler(table_crawler, udfs_crawler)
5570
table_mapping = TableMapping(installation, ws, sql_backend)
71+
group_manager = GroupManager(sql_backend, ws, config.inventory_database)
5672
migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, config.inventory_database, table_crawler)
57-
return cls(table_crawler, ws, sql_backend, table_mapping, migration_status_refresher)
73+
return cls(
74+
table_crawler, grants_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher
75+
)
5876

59-
def migrate_tables(self, *, what: What | None = None):
77+
def migrate_tables(self, *, what: What | None = None, acl_strategy: AclMigrationWhat | None = None):
6078
self._init_seen_tables()
6179
tables_to_migrate = self._tm.get_tables_to_migrate(self._tc)
80+
if acl_strategy is not None:
81+
grants_to_migrate = self._gc.snapshot()
82+
migrated_groups = self._group.snapshot()
6283
tasks = []
6384
for table in tables_to_migrate:
64-
if not what or table.src.what == what:
65-
tasks.append(partial(self._migrate_table, table.src, table.rule))
85+
if what is not None and table.src.what != what:
86+
continue
87+
match acl_strategy:
88+
case None:
89+
tasks.append(partial(self._migrate_table, table.src, table.rule))
90+
case AclMigrationWhat.LEGACY_TACL:
91+
grants = self._match_grants(table.src, grants_to_migrate, migrated_groups)
92+
tasks.append(partial(self._migrate_table, table.src, table.rule, grants))
93+
case AclMigrationWhat.PRINCIPAL:
94+
# TODO: Implement principal-based ACL migration
95+
pass
6696
Threads.strict("migrate tables", tasks)
6797

68-
def _migrate_table(self, src_table: Table, rule: Rule):
98+
def _migrate_table(self, src_table: Table, rule: Rule, grants: list[Grant] | None = None):
6999
if self._table_already_upgraded(rule.as_uc_table_key):
70100
logger.info(f"Table {src_table.key} already upgraded to {rule.as_uc_table_key}")
71101
return True
72102
if src_table.what == What.DBFS_ROOT_DELTA:
73-
return self._migrate_dbfs_root_table(src_table, rule)
103+
return self._migrate_dbfs_root_table(src_table, rule, grants)
74104
if src_table.what == What.EXTERNAL_SYNC:
75-
return self._migrate_external_table(src_table, rule)
105+
return self._migrate_external_table(src_table, rule, grants)
76106
if src_table.what == What.VIEW:
77-
return self._migrate_view(src_table, rule)
107+
return self._migrate_view(src_table, rule, grants)
78108
logger.info(f"Table {src_table.key} is not supported for migration")
79109
return True
80110

81-
def _migrate_external_table(self, src_table: Table, rule: Rule):
111+
def _migrate_external_table(self, src_table: Table, rule: Rule, grants: list[Grant] | None = None):
82112
target_table_key = rule.as_uc_table_key
83113
table_migrate_sql = src_table.sql_migrate_external(target_table_key)
84114
logger.debug(f"Migrating external table {src_table.key} to using SQL query: {table_migrate_sql}")
@@ -90,24 +120,36 @@ def _migrate_external_table(self, src_table: Table, rule: Rule):
90120
)
91121
return False
92122
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id()))
93-
return True
123+
return self._migrate_acl(src_table, rule, grants)
94124

95-
def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule):
125+
def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule, grants: list[Grant] | None = None):
96126
target_table_key = rule.as_uc_table_key
97127
table_migrate_sql = src_table.sql_migrate_dbfs(target_table_key)
98128
logger.debug(f"Migrating managed table {src_table.key} to using SQL query: {table_migrate_sql}")
99129
self._backend.execute(table_migrate_sql)
100130
self._backend.execute(src_table.sql_alter_to(rule.as_uc_table_key))
101131
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id()))
102-
return True
132+
return self._migrate_acl(src_table, rule, grants)
103133

104-
def _migrate_view(self, src_table: Table, rule: Rule):
134+
def _migrate_view(self, src_table: Table, rule: Rule, grants: list[Grant] | None = None):
105135
target_table_key = rule.as_uc_table_key
106136
table_migrate_sql = src_table.sql_migrate_view(target_table_key)
107137
logger.debug(f"Migrating view {src_table.key} to using SQL query: {table_migrate_sql}")
108138
self._backend.execute(table_migrate_sql)
109139
self._backend.execute(src_table.sql_alter_to(rule.as_uc_table_key))
110140
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id()))
141+
return self._migrate_acl(src_table, rule, grants)
142+
143+
def _migrate_acl(self, src: Table, rule: Rule, grants: list[Grant] | None):
144+
if grants is None:
145+
return True
146+
for grant in grants:
147+
acl_migrate_sql = grant.uc_grant_sql(src.kind, rule.as_uc_table_key)
148+
if acl_migrate_sql is None:
149+
logger.warning(f"Cannot identify UC grant for {src.kind} {rule.as_uc_table_key}. Skipping.")
150+
continue
151+
logger.debug(f"Migrating acls on {rule.as_uc_table_key} using SQL query: {acl_migrate_sql}")
152+
self._backend.execute(acl_migrate_sql)
111153
return True
112154

113155
def _table_already_upgraded(self, target) -> bool:
@@ -192,7 +234,7 @@ def print_revert_report(self, *, delete_managed: bool) -> bool | None:
192234
table_sub_header = " |"
193235
for what in list(What):
194236
if len(what.name.split("_")) - 1 < header:
195-
table_sub_header += f"{' '*12}|"
237+
table_sub_header += f"{' ' * 12}|"
196238
continue
197239
table_sub_header += f" {what.name.split('_')[header]:<10} |"
198240
print(table_sub_header)
@@ -216,6 +258,20 @@ def print_revert_report(self, *, delete_managed: bool) -> bool | None:
216258
def _init_seen_tables(self):
217259
self._seen_tables = self._migration_status_refresher.get_seen_tables()
218260

261+
@staticmethod
262+
def _match_grants(table: Table, grants: Iterable[Grant], migrated_groups: list[MigratedGroup]) -> list[Grant]:
263+
matched_grants = []
264+
for grant in grants:
265+
if grant.database != table.database:
266+
continue
267+
if table.name not in (grant.table, grant.view):
268+
continue
269+
matched_group = [g.name_in_account for g in migrated_groups if g.name_in_workspace == grant.principal]
270+
if len(matched_group) > 0:
271+
grant = dataclasses.replace(grant, principal=matched_group[0])
272+
matched_grants.append(grant)
273+
return matched_grants
274+
219275

220276
class MigrationStatusRefresher(CrawlerBase[MigrationStatus]):
221277
def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema, table_crawler: TablesCrawler):
@@ -241,7 +297,7 @@ def get_seen_tables(self) -> dict[str, str]:
241297
return seen_tables
242298

243299
def is_upgraded(self, schema: str, table: str) -> bool:
244-
result = self._backend.fetch(f"SHOW TBLPROPERTIES {escape_sql_identifier(schema+'.'+table)}")
300+
result = self._backend.fetch(f"SHOW TBLPROPERTIES {escape_sql_identifier(schema + '.' + table)}")
245301
for value in result:
246302
if value["key"] == "upgraded_to":
247303
logger.info(f"{schema}.{table} is set as upgraded")

src/databricks/labs/ucx/hive_metastore/tables.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ class What(Enum):
2525
UNKNOWN = auto()
2626

2727

28+
class AclMigrationWhat(Enum):
29+
LEGACY_TACL = auto()
30+
PRINCIPAL = auto()
31+
32+
2833
@dataclass
2934
class Table:
3035
catalog: str

src/databricks/labs/ucx/runtime.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,14 @@ def migrate_external_tables_sync(
431431
- For AWS: TBD
432432
"""
433433
table_crawler = TablesCrawler(sql_backend, cfg.inventory_database)
434+
udf_crawler = UdfsCrawler(sql_backend, cfg.inventory_database)
435+
grant_crawler = GrantsCrawler(table_crawler, udf_crawler)
434436
table_mapping = TableMapping(install, ws, sql_backend)
435437
migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, cfg.inventory_database, table_crawler)
436-
TablesMigrate(table_crawler, ws, sql_backend, table_mapping, migration_status_refresher).migrate_tables(
437-
what=What.EXTERNAL_SYNC
438-
)
438+
group_manager = GroupManager(sql_backend, ws, cfg.inventory_database)
439+
TablesMigrate(
440+
table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher
441+
).migrate_tables(what=What.EXTERNAL_SYNC)
439442

440443

441444
@task("migrate-tables", job_cluster="table_migration")
@@ -448,11 +451,14 @@ def migrate_dbfs_root_delta_tables(
448451
- For AWS: TBD
449452
"""
450453
table_crawler = TablesCrawler(sql_backend, cfg.inventory_database)
454+
udf_crawler = UdfsCrawler(sql_backend, cfg.inventory_database)
455+
grant_crawler = GrantsCrawler(table_crawler, udf_crawler)
451456
table_mapping = TableMapping(install, ws, sql_backend)
452457
migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, cfg.inventory_database, table_crawler)
453-
TablesMigrate(table_crawler, ws, sql_backend, table_mapping, migration_status_refresher).migrate_tables(
454-
what=What.DBFS_ROOT_DELTA
455-
)
458+
group_manager = GroupManager(sql_backend, ws, cfg.inventory_database)
459+
TablesMigrate(
460+
table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher
461+
).migrate_tables(what=What.DBFS_ROOT_DELTA)
456462

457463

458464
def main(*argv):

tests/integration/conftest.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
AzureServicePrincipalCrawler,
1616
AzureServicePrincipalInfo,
1717
)
18-
from databricks.labs.ucx.hive_metastore import TablesCrawler
18+
from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
19+
from databricks.labs.ucx.hive_metastore.grants import Grant
1920
from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping
2021
from databricks.labs.ucx.hive_metastore.tables import Table
2122
from databricks.labs.ucx.hive_metastore.udfs import Udf, UdfsCrawler
@@ -147,6 +148,28 @@ def snapshot(self) -> list[Udf]:
147148
return self._udfs
148149

149150

151+
class StaticGrantsCrawler(GrantsCrawler):
152+
def __init__(self, tc: TablesCrawler, udf: UdfsCrawler, grants: list[Grant]):
153+
super().__init__(tc, udf)
154+
self._grants = [
155+
Grant(
156+
principal=_.principal,
157+
action_type=_.action_type,
158+
catalog=_.catalog,
159+
database=_.database,
160+
table=_.table,
161+
view=_.view,
162+
udf=_.udf,
163+
any_file=_.any_file,
164+
anonymous_function=_.anonymous_function,
165+
)
166+
for _ in grants
167+
]
168+
169+
def snapshot(self) -> list[Grant]:
170+
return self._grants
171+
172+
150173
class StaticTableMapping(TableMapping):
151174
def __init__(self, workspace_client: WorkspaceClient, sb: SqlBackend, rules: list[Rule]):
152175
installation = Installation(workspace_client, 'ucx')

0 commit comments

Comments
 (0)