Skip to content

Commit 80e2217

Browse files
nfxwilliam-conti
andauthored
Use ws.groups.patch to rename groups instead of creating backup groups (#450)
we can use patch to rename groups, so that we can simplify the local group migration step. <img width="646" alt="image" src="https://github.com/databrickslabs/ucx/assets/259697/b0f54013-3366-4fd2-a1a4-236c7f033f6b"> <img width="325" alt="image" src="https://github.com/databrickslabs/ucx/assets/259697/3b2ea9e1-5b4c-4b34-94b4-8e25267fd5ef"> Closes #218 --------- Co-authored-by: William Conti <[email protected]> Co-authored-by: william-conti <[email protected]>
1 parent 536dfae commit 80e2217

30 files changed

+1136
-1520
lines changed

src/databricks/labs/ucx/config.py

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,7 @@
99

1010
from databricks.labs.ucx.__about__ import __version__
1111

12-
__all__ = ["GroupsConfig", "AccountConfig", "WorkspaceConfig"]
13-
14-
15-
# TODO: flatten down
16-
@dataclass
17-
class GroupsConfig:
18-
selected: list[str] | None = None
19-
auto: bool | None = None
20-
backup_group_prefix: str | None = "db-temp-"
21-
22-
def __post_init__(self):
23-
if not self.selected and self.auto is None:
24-
msg = "Either selected or auto must be set"
25-
raise ValueError(msg)
26-
if self.selected and self.auto is False:
27-
msg = "No selected groups provided, but auto-collection is disabled"
28-
raise ValueError(msg)
29-
30-
@classmethod
31-
def from_dict(cls, raw: dict):
32-
return cls(**raw)
12+
__all__ = ["AccountConfig", "WorkspaceConfig"]
3313

3414

3515
@dataclass
@@ -97,7 +77,8 @@ def from_dict(cls, raw: dict):
9777

9878

9979
# Used to set the right expectation about configuration file schema
100-
_CONFIG_VERSION = 1
80+
# TODO: config versions migrate
81+
_CONFIG_VERSION = 2
10182

10283
T = TypeVar("T")
10384

@@ -121,16 +102,6 @@ def from_bytes(cls, raw: str) -> T:
121102
def from_file(cls, config_file: Path) -> T:
122103
return cls.from_bytes(config_file.read_text())
123104

124-
@classmethod
125-
def _verify_version(cls, raw):
126-
stored_version = raw.pop("version", None)
127-
if stored_version != _CONFIG_VERSION:
128-
msg = (
129-
f"Unsupported config version: {stored_version}. "
130-
f"UCX v{__version__} expects config version to be {_CONFIG_VERSION}"
131-
)
132-
raise ValueError(msg)
133-
134105
def __post_init__(self):
135106
if self.connect is None:
136107
self.connect = ConnectConfig()
@@ -199,14 +170,19 @@ def to_account_client(self) -> AccountClient:
199170
@dataclass
200171
class WorkspaceConfig(_Config["WorkspaceConfig"]):
201172
inventory_database: str
202-
groups: GroupsConfig
173+
174+
# Includes group names for migration. If not specified, all matching groups will be picked up
175+
include_group_names: list[str] = None
176+
177+
renamed_group_prefix: str = "ucx-renamed-"
178+
203179
instance_pool_id: str = None
204180
warehouse_id: str = None
205181
connect: ConnectConfig | None = None
206182
num_threads: int | None = 10
207-
log_level: str | None = "INFO"
208183
database_to_catalog_mapping: dict[str, str] = None
209184
default_catalog: str = "ucx_default"
185+
log_level: str = "INFO"
210186

211187
# Starting path for notebooks and directories crawler
212188
workspace_start_path: str = "/"
@@ -215,24 +191,23 @@ class WorkspaceConfig(_Config["WorkspaceConfig"]):
215191

216192
@classmethod
217193
def from_dict(cls, raw: dict):
218-
cls._verify_version(raw)
219-
return cls(
220-
inventory_database=raw.get("inventory_database"),
221-
groups=GroupsConfig.from_dict(raw.get("groups", {})),
222-
connect=ConnectConfig.from_dict(raw.get("connect", {})),
223-
instance_pool_id=raw.get("instance_pool_id", None),
224-
warehouse_id=raw.get("warehouse_id", None),
225-
num_threads=raw.get("num_threads", 10),
226-
log_level=raw.get("log_level", "INFO"),
227-
database_to_catalog_mapping=raw.get("database_to_catalog_mapping", None),
228-
instance_profile=raw.get("instance_profile", None),
229-
spark_conf=raw.get("spark_conf", None),
230-
default_catalog=raw.get("default_catalog", "main"),
231-
workspace_start_path=raw.get("workspace_start_path", "/"),
232-
)
194+
raw = cls._migrate_from_v1(raw)
195+
return cls(**raw)
233196

234197
def to_workspace_client(self) -> WorkspaceClient:
235198
return WorkspaceClient(config=self.to_databricks_config())
236199

237200
def replace_inventory_variable(self, text: str) -> str:
238201
return text.replace("$inventory", f"hive_metastore.{self.inventory_database}")
202+
203+
@classmethod
204+
def _migrate_from_v1(cls, raw: dict):
205+
stored_version = raw.pop("version", None)
206+
if stored_version == _CONFIG_VERSION:
207+
return raw
208+
if stored_version == 1:
209+
raw["include_group_names"] = raw.get("groups", {"selected": []})["selected"]
210+
raw["renamed_group_prefix"] = raw.get("groups", {"backup_group_prefix": "db-temp-"})["backup_group_prefix"]
211+
return raw
212+
msg = f"Unknown config version: {stored_version}"
213+
raise ValueError(msg)

src/databricks/labs/ucx/install.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
JobInfo,
3131
PipelineInfo,
3232
)
33-
from databricks.labs.ucx.config import GroupsConfig, WorkspaceConfig
33+
from databricks.labs.ucx.config import WorkspaceConfig
3434
from databricks.labs.ucx.framework.crawlers import (
3535
SchemaDeployer,
3636
SqlBackend,
@@ -379,10 +379,11 @@ def warehouse_type(_):
379379
groups_config_args = {
380380
"backup_group_prefix": backup_group_prefix,
381381
}
382+
382383
if selected_groups != "<ALL>":
383384
groups_config_args["selected"] = [x.strip() for x in selected_groups.split(",")]
384385
else:
385-
groups_config_args["auto"] = True
386+
groups_config_args["selected"] = None
386387

387388
# Checking for external HMS
388389
instance_profile = None
@@ -408,7 +409,8 @@ def warehouse_type(_):
408409

409410
self._config = WorkspaceConfig(
410411
inventory_database=inventory_database,
411-
groups=GroupsConfig(**groups_config_args),
412+
include_group_names=groups_config_args["selected"],
413+
renamed_group_prefix=groups_config_args["backup_group_prefix"],
412414
warehouse_id=warehouse_id,
413415
log_level=log_level,
414416
num_threads=num_threads,
@@ -718,7 +720,7 @@ def _job_clusters(self, names: set[str]):
718720
num_workers=0,
719721
)
720722
)
721-
if self._ws.config.is_aws:
723+
if self._ws.config.is_aws and spec.aws_attributes is not None:
722724
aws_attributes = replace(spec.aws_attributes, instance_profile_arn=self._config.instance_profile)
723725
spec = replace(spec, aws_attributes=aws_attributes)
724726
if "main" in names:

src/databricks/labs/ucx/runtime.py

Lines changed: 37 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@
1818
from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocationCrawler
1919
from databricks.labs.ucx.hive_metastore.mounts import Mounts
2020
from databricks.labs.ucx.workspace_access.generic import WorkspaceListing
21-
from databricks.labs.ucx.workspace_access.groups import (
22-
GroupManager,
23-
GroupMigrationState,
24-
)
21+
from databricks.labs.ucx.workspace_access.groups import GroupManager
2522
from databricks.labs.ucx.workspace_access.manager import PermissionManager
2623

2724
logger = logging.getLogger(__name__)
@@ -194,10 +191,22 @@ def crawl_permissions(cfg: WorkspaceConfig):
194191
permission_manager.inventorize_permissions()
195192

196193

194+
@task("assessment")
195+
def crawl_groups(cfg: WorkspaceConfig):
196+
"""Scans all groups for the local group migration scope"""
197+
sql_backend = RuntimeBackend()
198+
ws = WorkspaceClient(config=cfg.to_databricks_config())
199+
group_manager = GroupManager(
200+
sql_backend, ws, cfg.inventory_database, cfg.include_group_names, cfg.renamed_group_prefix
201+
)
202+
group_manager.snapshot()
203+
204+
197205
@task(
198206
"assessment",
199207
depends_on=[
200208
crawl_grants,
209+
crawl_groups,
201210
crawl_permissions,
202211
guess_external_locations,
203212
assess_jobs,
@@ -214,55 +223,29 @@ def assessment_report(_: WorkspaceConfig):
214223
dashboard _before_ all tasks have been completed, but then only already completed information is shown."""
215224

216225

217-
@task("002-apply-permissions-to-backup-groups", depends_on=[crawl_permissions], job_cluster="tacl")
218-
def apply_permissions_to_backup_groups(cfg: WorkspaceConfig):
219-
"""Second phase of the workspace-local group migration process. It does the following:
220-
- Creates a backup of every workspace-local group, adding a prefix that can be set in the configuration
221-
- Assigns the full set of permissions of the original group to the backup one
222-
223-
It covers local workspace-local permissions for all entities: Legacy Table ACLs, Entitlements,
224-
AWS instance profiles, Clusters, Cluster policies, Instance Pools, Databricks SQL warehouses, Delta Live
225-
Tables, Jobs, MLflow experiments, MLflow registry, SQL Dashboards & Queries, SQL Alerts, Token and Password usage
226-
permissions, Secret Scopes, Notebooks, Directories, Repos, Files.
227-
228-
See [interactive tutorial here](https://app.getreprise.com/launch/myM3VNn/)."""
226+
@task("migrate-groups", depends_on=[crawl_groups])
227+
def rename_workspace_local_groups(cfg: WorkspaceConfig):
228+
"""Renames workspace local groups by adding `ucx-renamed-` prefix."""
229+
sql_backend = RuntimeBackend()
229230
ws = WorkspaceClient(config=cfg.to_databricks_config())
230-
group_manager = GroupManager(ws, cfg.groups)
231-
group_manager.prepare_groups_in_environment()
232-
if not group_manager.has_groups():
233-
logger.info("Skipping group migration as no groups were found.")
234-
return
235-
236-
backend = RuntimeBackend()
237-
permission_manager = PermissionManager.factory(
238-
ws,
239-
backend,
240-
cfg.inventory_database,
241-
num_threads=cfg.num_threads,
242-
workspace_start_path=cfg.workspace_start_path,
231+
group_manager = GroupManager(
232+
sql_backend, ws, cfg.inventory_database, cfg.include_group_names, cfg.renamed_group_prefix
243233
)
244-
permission_manager.apply_group_permissions(group_manager.migration_state, destination="backup")
245-
group_manager.migration_state.persist_migration_state(backend, cfg.inventory_database)
234+
group_manager.rename_groups()
246235

247236

248-
@task("003-replace-workspace-local-with-account-groups", depends_on=[apply_permissions_to_backup_groups])
249-
def replace_workspace_groups_with_account_groups(cfg: WorkspaceConfig):
250-
"""Third phase of the workspace-local group migration process. It does the following:
251-
- Creates an account-level group with the original name of the workspace-local one"""
237+
@task("migrate-groups", depends_on=[rename_workspace_local_groups])
238+
def reflect_account_groups_on_workspace(cfg: WorkspaceConfig):
239+
"""Adds matching account groups to this workspace."""
240+
sql_backend = RuntimeBackend()
252241
ws = WorkspaceClient(config=cfg.to_databricks_config())
253-
group_manager = GroupManager(ws, cfg.groups)
254-
remote_state = GroupMigrationState().fetch_migration_state(RuntimeBackend(), cfg.inventory_database)
255-
if len(remote_state.groups) == 0:
256-
logger.info("Skipping group migration as no groups were found.")
257-
return
258-
group_manager.replace_workspace_groups_with_account_groups(remote_state)
242+
group_manager = GroupManager(
243+
sql_backend, ws, cfg.inventory_database, cfg.include_group_names, cfg.renamed_group_prefix
244+
)
245+
group_manager.reflect_account_groups_on_workspace()
259246

260247

261-
@task(
262-
"004-apply-permissions-to-account-groups",
263-
depends_on=[replace_workspace_groups_with_account_groups],
264-
job_cluster="tacl",
265-
)
248+
@task("migrate-groups", depends_on=[reflect_account_groups_on_workspace], job_cluster="tacl")
266249
def apply_permissions_to_account_groups(cfg: WorkspaceConfig):
267250
"""Fourth phase of the workspace-local group migration process. It does the following:
268251
- Assigns the full set of permissions of the original group to the account-level one
@@ -273,13 +256,11 @@ def apply_permissions_to_account_groups(cfg: WorkspaceConfig):
273256
permissions, Secret Scopes, Notebooks, Directories, Repos, Files.
274257
275258
See [interactive tutorial here](https://app.getreprise.com/launch/myM3VNn/)."""
276-
ws = WorkspaceClient(config=cfg.to_databricks_config())
277259
backend = RuntimeBackend()
260+
ws = WorkspaceClient(config=cfg.to_databricks_config())
261+
group_manager = GroupManager(backend, ws, cfg.inventory_database, cfg.include_group_names, cfg.renamed_group_prefix)
278262

279-
remote_state = GroupMigrationState().fetch_migration_state(backend, cfg.inventory_database)
280-
migration_state = GroupManager.prepare_apply_permissions_to_account_groups(
281-
ws, remote_state, cfg.groups.backup_group_prefix
282-
)
263+
migration_state = group_manager.get_migration_state()
283264
if len(migration_state.groups) == 0:
284265
logger.info("Skipping group migration as no groups were found.")
285266
return
@@ -291,17 +272,18 @@ def apply_permissions_to_account_groups(cfg: WorkspaceConfig):
291272
num_threads=cfg.num_threads,
292273
workspace_start_path=cfg.workspace_start_path,
293274
)
294-
permission_manager.apply_group_permissions(migration_state, destination="account")
275+
permission_manager.apply_group_permissions(migration_state)
295276

296277

297-
@task("005-remove-workspace-local-backup-groups", depends_on=[apply_permissions_to_account_groups])
278+
@task("remove-workspace-local-backup-groups", depends_on=[apply_permissions_to_account_groups])
298279
def delete_backup_groups(cfg: WorkspaceConfig):
299280
"""Last step of the group migration process. Removes all workspace-level backup groups, along with their
300281
permissions. Execute this workflow only after you've confirmed that workspace-local migration worked
301282
successfully for all the groups involved."""
283+
backend = RuntimeBackend()
302284
ws = WorkspaceClient(config=cfg.to_databricks_config())
303-
group_manager = GroupManager(ws, cfg.groups)
304-
group_manager.delete_backup_groups()
285+
group_manager = GroupManager(backend, ws, cfg.inventory_database, cfg.include_group_names, cfg.renamed_group_prefix)
286+
group_manager.delete_original_workspace_groups()
305287

306288

307289
@task("099-destroy-schema")

src/databricks/labs/ucx/workspace_access/base.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
from collections.abc import Callable, Iterator
33
from dataclasses import dataclass
44
from logging import Logger
5-
from typing import Literal
65

7-
from databricks.labs.ucx.workspace_access.groups import GroupMigrationState
6+
from databricks.labs.ucx.workspace_access.groups import MigrationState
87

98
logger = Logger(__name__)
109

@@ -17,9 +16,6 @@ class Permissions:
1716
raw: str
1817

1918

20-
Destination = Literal["backup", "account"]
21-
22-
2319
class AclSupport:
2420
@abstractmethod
2521
def get_crawler_tasks(self) -> Iterator[Callable[..., Permissions | None]]:
@@ -29,9 +25,7 @@ def get_crawler_tasks(self) -> Iterator[Callable[..., Permissions | None]]:
2925
"""
3026

3127
@abstractmethod
32-
def get_apply_task(
33-
self, item: Permissions, migration_state: GroupMigrationState, destination: Destination
34-
) -> Callable[[], None] | None:
28+
def get_apply_task(self, item: Permissions, migration_state: MigrationState) -> Callable[[], None] | None:
3529
"""This method returns a Callable, that applies permissions to a destination group, based on
3630
the group migration state. The callable is required not to have any shared mutable state."""
3731

src/databricks/labs/ucx/workspace_access/generic.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,8 @@
1515

1616
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
1717
from databricks.labs.ucx.mixins.hardening import rate_limited
18-
from databricks.labs.ucx.workspace_access.base import (
19-
AclSupport,
20-
Destination,
21-
Permissions,
22-
)
23-
from databricks.labs.ucx.workspace_access.groups import GroupMigrationState
18+
from databricks.labs.ucx.workspace_access.base import AclSupport, Permissions
19+
from databricks.labs.ucx.workspace_access.groups import MigrationState
2420

2521
logger = logging.getLogger(__name__)
2622

@@ -81,15 +77,15 @@ def object_types(self) -> set[str]:
8177
all_object_types.add(object_type)
8278
return all_object_types
8379

84-
def get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination):
80+
def get_apply_task(self, item: Permissions, migration_state: MigrationState):
8581
if not self._is_item_relevant(item, migration_state):
8682
return None
8783
object_permissions = iam.ObjectPermissions.from_dict(json.loads(item.raw))
88-
new_acl = self._prepare_new_acl(object_permissions, migration_state, destination)
84+
new_acl = self._prepare_new_acl(object_permissions, migration_state)
8985
return partial(self._applier_task, item.object_type, item.object_id, new_acl)
9086

9187
@staticmethod
92-
def _is_item_relevant(item: Permissions, migration_state: GroupMigrationState) -> bool:
88+
def _is_item_relevant(item: Permissions, migration_state: MigrationState) -> bool:
9389
# passwords and tokens are represented on the workspace-level
9490
if item.object_id in ("tokens", "passwords"):
9591
return True
@@ -239,7 +235,7 @@ def _safe_update_permissions(
239235
raise RetryableError(message=msg) from e
240236

241237
def _prepare_new_acl(
242-
self, permissions: iam.ObjectPermissions, migration_state: GroupMigrationState, destination: Destination
238+
self, permissions: iam.ObjectPermissions, migration_state: MigrationState
243239
) -> list[iam.AccessControlRequest]:
244240
_acl = permissions.access_control_list
245241
acl_requests = []
@@ -248,7 +244,7 @@ def _prepare_new_acl(
248244
if not migration_state.is_in_scope(_item.group_name):
249245
logger.debug(f"Skipping {_item} for {coord} because it is not in scope")
250246
continue
251-
new_group_name = migration_state.get_target_principal(_item.group_name, destination)
247+
new_group_name = migration_state.get_target_principal(_item.group_name)
252248
if new_group_name is None:
253249
logger.debug(f"Skipping {_item.group_name} for {coord} because it has no target principal")
254250
continue

0 commit comments

Comments
 (0)