Skip to content

Commit fe06262

Browse files
authored
Let create-catalogs-schemas reuse MigrateGrants so that it applies group renaming (#2955)
## Changes Let `create-catalogs-schemas` reuse `MigrateGrants` so that it applies group renaming and eliminate duplicate code. ### Linked issues Resolves #2934 Resolves #2932 ### Functionality - [x] modified existing command: `databricks labs ucx create-catalogs-schemas` - [x] modified existing workflow: `migrate-tables` (*functionally no change) ### Tests - [ ] manually tested - [x] added unit tests - [x] added integration tests
1 parent 2eff82b commit fe06262

File tree

13 files changed

+748
-421
lines changed

13 files changed

+748
-421
lines changed

src/databricks/labs/ucx/contexts/application.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -373,14 +373,7 @@ def table_mapping(self) -> TableMapping:
373373

374374
@cached_property
375375
def catalog_schema(self) -> CatalogSchema:
376-
return CatalogSchema(
377-
self.workspace_client,
378-
self.table_mapping,
379-
self.principal_acl,
380-
self.sql_backend,
381-
self.grants_crawler,
382-
self.config.ucx_catalog,
383-
)
376+
return CatalogSchema(self.workspace_client, self.table_mapping, self.migrate_grants, self.config.ucx_catalog)
384377

385378
@cached_property
386379
def verify_timeout(self) -> timedelta:

src/databricks/labs/ucx/hive_metastore/catalog_schema.py

Lines changed: 222 additions & 179 deletions
Large diffs are not rendered by default.

src/databricks/labs/ucx/hive_metastore/grants.py

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from collections.abc import Callable, Iterable
44
from dataclasses import dataclass, replace
55
from functools import partial, cached_property
6+
from typing import Protocol
67

78
from databricks.labs.blueprint.installation import Installation
89
from databricks.labs.blueprint.parallel import ManyError, Threads
@@ -109,6 +110,15 @@ def object_key(self) -> str:
109110
_, key = self.this_type_and_key()
110111
return key.lower()
111112

113+
@property
114+
def order(self) -> int:
115+
"""Order of the grants to be upheld when applying."""
116+
match self.action_type:
117+
case "OWN": # Apply ownership as last to avoid losing permissions for applying grants
118+
return 1
119+
case _:
120+
return 0
121+
112122
def this_type_and_key(self):
113123
return self.type_and_key(
114124
catalog=self.catalog,
@@ -598,17 +608,22 @@ def __init__(
598608
self._external_locations = external_locations
599609
self._compute_locations = cluster_locations
600610

601-
def get_interactive_cluster_grants(self) -> list[Grant]:
611+
def get_interactive_cluster_grants(self) -> set[Grant]:
602612
tables = list(self._tables_crawler.snapshot())
603-
grants: set[Grant] = set()
613+
grants = set[Grant]()
604614

605-
for compute_location in self._compute_locations():
615+
try:
616+
compute_locations = self._compute_locations()
617+
except DatabricksError as e:
618+
logger.warning("No compute locations found.", exc_info=e)
619+
return grants
620+
for compute_location in compute_locations:
606621
principals = self._get_cluster_principal_mapping(compute_location.compute_id, compute_location.compute_type)
607622
if len(principals) == 0:
608623
continue
609624
cluster_usage = self._get_grants(compute_location.locations, principals, tables)
610625
grants.update(cluster_usage)
611-
return list(grants)
626+
return grants
612627

613628
def _get_privilege(self, table: Table, locations: dict[str, str]) -> str | None:
614629
if table.view_text is not None:
@@ -726,6 +741,27 @@ def _get_location_name(self, location_url: str):
726741
return None
727742

728743

744+
class SecurableObject(Protocol):
745+
"""A protocol for a securable object.
746+
747+
Docs:
748+
https://docs.databricks.com/en/data-governance/table-acls/object-privileges.html#securable-objects-in-the-hive-metastore
749+
"""
750+
751+
@property
752+
def kind(self) -> str:
753+
"""The type of securable objects, see doc referenced above."""
754+
755+
@property
756+
def full_name(self) -> str:
757+
"""The object name often a synonym for `key`"""
758+
759+
@property
760+
def key(self) -> str:
761+
"""The object identifier often a synonym for `full_name`"""
762+
return self.full_name
763+
764+
729765
class MigrateGrants:
730766
def __init__(
731767
self,
@@ -737,20 +773,22 @@ def __init__(
737773
self._group_manager = group_manager
738774
self._grant_loaders = grant_loaders
739775

740-
def apply(self, src: Table, uc_table_key: str) -> bool:
776+
def apply(self, src: SecurableObject, dst: SecurableObject) -> bool:
741777
for grant in self._match_grants(src):
742-
acl_migrate_sql = grant.uc_grant_sql(src.kind, uc_table_key)
778+
acl_migrate_sql = grant.uc_grant_sql(dst.kind, dst.full_name)
743779
if acl_migrate_sql is None:
744780
logger.warning(
745781
f"failed-to-migrate: Hive metastore grant '{grant.action_type}' cannot be mapped to UC grant for "
746-
f"{src.kind} '{uc_table_key}'. Skipping."
782+
f"{dst.kind} '{dst.full_name}'. Skipping."
747783
)
748784
continue
749-
logger.debug(f"Migrating acls on {uc_table_key} using SQL query: {acl_migrate_sql}")
785+
logger.debug(f"Migrating acls on {dst.full_name} using SQL query: {acl_migrate_sql}")
750786
try:
751787
self._sql_backend.execute(acl_migrate_sql)
752788
except DatabricksError as e:
753-
logger.warning(f"failed-to-migrate: Failed to migrate ACL for {src.key} to {uc_table_key}: {e}")
789+
logger.warning(
790+
f"failed-to-migrate: Failed to migrate ACL for {src.full_name} to {dst.full_name}", exc_info=e
791+
)
754792
return True
755793

756794
@cached_property
@@ -765,16 +803,14 @@ def _grants(self) -> list[Grant]:
765803
grants.append(grant)
766804
return grants
767805

768-
def _match_grants(self, table: Table) -> list[Grant]:
806+
def _match_grants(self, src: SecurableObject) -> list[Grant]:
769807
matched_grants = []
770808
for grant in self._grants:
771-
if grant.database != table.database:
772-
continue
773-
if table.name not in (grant.table, grant.view):
809+
if grant.object_key != src.key:
774810
continue
775811
grant = self._replace_account_group(grant)
776812
matched_grants.append(grant)
777-
return matched_grants
813+
return sorted(matched_grants, key=lambda g: g.order)
778814

779815
def _replace_account_group(self, grant: Grant) -> Grant:
780816
target_principal = self._workspace_to_account_group_names.get(grant.principal)

src/databricks/labs/ucx/hive_metastore/mapping.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ def from_src_dst(cls, src_table: TableInfo, dst_schema: SchemaInfo) -> "Rule":
5858
def match(self, table: TableIdentifier) -> bool:
5959
return table.catalog == "hive_metastore" and self.src_schema == table.schema and self.src_table == table.table
6060

61+
@property
62+
def as_uc_table(self) -> Table:
63+
return Table(
64+
self.catalog_name,
65+
self.dst_schema,
66+
self.dst_table,
67+
object_type="UNKNOWN",
68+
table_format="UNKNOWN",
69+
)
70+
6171
@property
6272
def as_uc_table_key(self) -> str:
6373
return f"{self.catalog_name}.{self.dst_schema}.{self.dst_table}"

src/databricks/labs/ucx/hive_metastore/table_migrate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def _migrate_view_table(self, src_view: ViewToMigrate):
233233
except DatabricksError as e:
234234
logger.warning(f"Failed to migrate view {src_view.src.key} to {src_view.rule.as_uc_table_key}: {e}")
235235
return False
236-
return self._migrate_grants.apply(src_view.src, src_view.rule.as_uc_table_key)
236+
return self._migrate_grants.apply(src_view.src, src_view.rule.as_uc_table)
237237

238238
def _sql_migrate_view(self, src_view: ViewToMigrate) -> str:
239239
# We have to fetch create statement this way because of columns in:
@@ -309,7 +309,7 @@ def _migrate_managed_as_external_table(self, src_table: Table, rule: Rule):
309309
)
310310
return False
311311
self._backend.execute(self._sql_alter_from(src_table, rule.as_uc_table_key, self._ws.get_workspace_id()))
312-
return self._migrate_grants.apply(src_table, rule.as_uc_table_key)
312+
return self._migrate_grants.apply(src_table, rule.as_uc_table)
313313

314314
def _migrate_external_table(self, src_table: Table, rule: Rule):
315315
target_table_key = rule.as_uc_table_key
@@ -324,7 +324,7 @@ def _migrate_external_table(self, src_table: Table, rule: Rule):
324324
)
325325
return False
326326
self._backend.execute(self._sql_alter_from(src_table, rule.as_uc_table_key, self._ws.get_workspace_id()))
327-
return self._migrate_grants.apply(src_table, rule.as_uc_table_key)
327+
return self._migrate_grants.apply(src_table, rule.as_uc_table)
328328

329329
def _migrate_external_table_hiveserde_in_place(self, src_table: Table, rule: Rule):
330330
# verify hive serde type
@@ -362,7 +362,7 @@ def _migrate_external_table_hiveserde_in_place(self, src_table: Table, rule: Rul
362362
except DatabricksError as e:
363363
logger.warning(f"failed-to-migrate: Failed to migrate table {src_table.key} to {rule.as_uc_table_key}: {e}")
364364
return False
365-
return self._migrate_grants.apply(src_table, rule.as_uc_table_key)
365+
return self._migrate_grants.apply(src_table, rule.as_uc_table)
366366

367367
def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule):
368368
target_table_key = rule.as_uc_table_key
@@ -378,7 +378,7 @@ def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule):
378378
except DatabricksError as e:
379379
logger.warning(f"failed-to-migrate: Failed to migrate table {src_table.key} to {rule.as_uc_table_key}: {e}")
380380
return False
381-
return self._migrate_grants.apply(src_table, rule.as_uc_table_key)
381+
return self._migrate_grants.apply(src_table, rule.as_uc_table)
382382

383383
def _migrate_table_create_ctas(self, src_table: Table, rule: Rule):
384384
if src_table.what not in [What.EXTERNAL_NO_SYNC, What.EXTERNAL_HIVESERDE]:
@@ -402,7 +402,7 @@ def _migrate_table_create_ctas(self, src_table: Table, rule: Rule):
402402
except DatabricksError as e:
403403
logger.warning(f"failed-to-migrate: Failed to migrate table {src_table.key} to {rule.as_uc_table_key}: {e}")
404404
return False
405-
return self._migrate_grants.apply(src_table, rule.as_uc_table_key)
405+
return self._migrate_grants.apply(src_table, rule.as_uc_table)
406406

407407
def _migrate_table_in_mount(self, src_table: Table, rule: Rule):
408408
target_table_key = rule.as_uc_table_key
@@ -417,7 +417,7 @@ def _migrate_table_in_mount(self, src_table: Table, rule: Rule):
417417
except DatabricksError as e:
418418
logger.warning(f"failed-to-migrate: Failed to migrate table {src_table.key} to {rule.as_uc_table_key}: {e}")
419419
return False
420-
return self._migrate_grants.apply(src_table, rule.as_uc_table_key)
420+
return self._migrate_grants.apply(src_table, rule.as_uc_table)
421421

422422
def _table_already_migrated(self, target) -> bool:
423423
return target in self._seen_tables

src/databricks/labs/ucx/hive_metastore/tables.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def is_hive(self) -> bool:
9999

100100
@property
101101
def key(self) -> str:
102+
# TODO: https://github.com/databrickslabs/ucx/issues/2979
102103
if self.is_table_in_mount:
103104
return f"{self.catalog}.{self.database}.{self.location}".lower()
104105
return f"{self.catalog}.{self.database}.{self.name}".lower()

0 commit comments

Comments
 (0)