Skip to content

Commit e7543a8

Browse files
authored
Split migrate-groups workflow into three different stages for reliability (#442)
1 parent af58f0e commit e7543a8

File tree

4 files changed

+177
-147
lines changed

4 files changed

+177
-147
lines changed

src/databricks/labs/ucx/mixins/fixtures.py

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,27 @@
2121

2222
from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
2323

24-
_LOG = logging.getLogger(__name__)
24+
logger = logging.getLogger(__name__)
2525

2626

2727
def factory(name, create, remove):
2828
cleanup = []
2929

3030
def inner(**kwargs):
3131
x = create(**kwargs)
32-
_LOG.debug(f"added {name} fixture: {x}")
32+
logger.debug(f"added {name} fixture: {x}")
3333
cleanup.append(x)
3434
return x
3535

3636
yield inner
37-
_LOG.debug(f"clearing {len(cleanup)} {name} fixtures")
37+
logger.debug(f"clearing {len(cleanup)} {name} fixtures")
3838
for x in cleanup:
3939
try:
40-
_LOG.debug(f"removing {name} fixture: {x}")
40+
logger.debug(f"removing {name} fixture: {x}")
4141
remove(x)
4242
except DatabricksError as e:
4343
# TODO: fix on the databricks-labs-pytester level
44-
_LOG.debug(f"ignoring error while {name} {x} teardown: {e}")
44+
logger.debug(f"ignoring error while {name} {x} teardown: {e}")
4545

4646

4747
@pytest.fixture
@@ -122,7 +122,7 @@ def acc(product_info, debug_env) -> AccountClient:
122122
# Use variables from Unified Auth
123123
# See https://databricks-sdk-py.readthedocs.io/en/latest/authentication.html
124124
product_name, product_version = product_info
125-
_LOG.debug(f"Running with {len(debug_env)} env variables")
125+
logger.debug(f"Running with {len(debug_env)} env variables")
126126
return AccountClient(
127127
host=debug_env["DATABRICKS_HOST"],
128128
account_id=debug_env["DATABRICKS_ACCOUNT_ID"],
@@ -408,7 +408,7 @@ def _scim_values(ids: list[str]) -> list[iam.ComplexValue]:
408408
return [iam.ComplexValue(value=x) for x in ids]
409409

410410

411-
def _make_group(name, interface, make_random):
411+
def _make_group(name, cfg, interface, make_random):
412412
def create(
413413
*,
414414
members: list[str] | None = None,
@@ -425,19 +425,24 @@ def create(
425425
if entitlements is not None:
426426
kwargs["entitlements"] = _scim_values(entitlements)
427427
# TODO: REQUEST_LIMIT_EXCEEDED: GetUserPermissionsRequest RPC token bucket limit has been exceeded.
428-
return interface.create(**kwargs)
428+
group = interface.create(**kwargs)
429+
if cfg.is_account_client:
430+
logger.info(f"Account group {group.display_name}: {cfg.host}/users/groups/{group.id}/members")
431+
else:
432+
logger.info(f"Workspace group {group.display_name}: {cfg.host}#setting/accounts/groups/{group.id}")
433+
return group
429434

430435
yield from factory(name, create, lambda item: interface.delete(item.id))
431436

432437

433438
@pytest.fixture
434439
def make_group(ws, make_random):
435-
yield from _make_group("workspace group", ws.groups, make_random)
440+
yield from _make_group("workspace group", ws.config, ws.groups, make_random)
436441

437442

438443
@pytest.fixture
439444
def make_acc_group(acc, make_random):
440-
yield from _make_group("account group", acc.groups, make_random)
445+
yield from _make_group("account group", acc.config, acc.groups, make_random)
441446

442447

443448
@pytest.fixture
@@ -449,7 +454,11 @@ def create(*, name: str | None = None, **kwargs):
449454
kwargs["definition"] = json.dumps(
450455
{"spark_conf.spark.databricks.delta.preview.enabled": {"type": "fixed", "value": "true"}}
451456
)
452-
return ws.cluster_policies.create(name, **kwargs)
457+
cluster_policy = ws.cluster_policies.create(name, **kwargs)
458+
logger.info(
459+
f"Cluster policy: {ws.config.host}#setting/clusters/cluster-policies/view/{cluster_policy.policy_id}"
460+
)
461+
return cluster_policy
453462

454463
yield from factory("cluster policy", create, lambda item: ws.cluster_policies.delete(item.policy_id))
455464

@@ -565,7 +574,9 @@ def create(**kwargs):
565574
timeout_seconds=0,
566575
)
567576
]
568-
return ws.jobs.create(**kwargs)
577+
job = ws.jobs.create(**kwargs)
578+
logger.info(f"Job: {ws.config.host}#job/{job.job_id}")
579+
return job
569580

570581
yield from factory("job", create, lambda item: ws.jobs.delete(item.job_id))
571582

@@ -726,13 +737,18 @@ def create() -> CatalogInfo:
726737

727738

728739
@pytest.fixture
729-
def make_schema(sql_backend, make_random) -> Callable[..., SchemaInfo]:
740+
def make_schema(ws, sql_backend, make_random) -> Callable[..., SchemaInfo]:
730741
def create(*, catalog_name: str = "hive_metastore", name: str | None = None) -> SchemaInfo:
731742
if name is None:
732743
name = f"ucx_S{make_random(4)}"
733744
full_name = f"{catalog_name}.{name}".lower()
734745
sql_backend.execute(f"CREATE SCHEMA {full_name}")
735-
return SchemaInfo(catalog_name=catalog_name, name=name, full_name=full_name)
746+
schema_info = SchemaInfo(catalog_name=catalog_name, name=name, full_name=full_name)
747+
logger.info(
748+
f"Schema {schema_info.full_name}: "
749+
f"{ws.config.host}/explore/data/{schema_info.catalog_name}/{schema_info.name}"
750+
)
751+
return schema_info
736752

737753
yield from factory(
738754
"schema",
@@ -742,7 +758,7 @@ def create(*, catalog_name: str = "hive_metastore", name: str | None = None) ->
742758

743759

744760
@pytest.fixture
745-
def make_table(sql_backend, make_schema, make_random) -> Callable[..., TableInfo]:
761+
def make_table(ws, sql_backend, make_schema, make_random) -> Callable[..., TableInfo]:
746762
def create(
747763
*,
748764
catalog_name="hive_metastore",
@@ -781,7 +797,12 @@ def create(
781797
ddl = f"{ddl} TBLPROPERTIES ({tbl_properties})"
782798

783799
sql_backend.execute(ddl)
784-
return TableInfo(catalog_name=catalog_name, schema_name=schema_name, name=name, full_name=full_name)
800+
table_info = TableInfo(catalog_name=catalog_name, schema_name=schema_name, name=name, full_name=full_name)
801+
logger.info(
802+
f"Table {table_info.full_name}: "
803+
f"{ws.config.host}/explore/data/{table_info.catalog_name}/{table_info.schema_name}/{table_info.name}"
804+
)
805+
return table_info
785806

786807
def remove(table_info: TableInfo):
787808
try:

src/databricks/labs/ucx/runtime.py

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,11 @@ def assessment_report(_: WorkspaceConfig):
203203
dashboard _before_ all tasks have been completed, but then only already completed information is shown."""
204204

205205

206-
@task("migrate-groups", depends_on=[crawl_permissions], job_cluster="tacl")
207-
def migrate_permissions(cfg: WorkspaceConfig):
208-
"""Main phase of the group migration process. It does the following:
206+
@task("002-apply-permissions-to-backup-groups", depends_on=[crawl_permissions], job_cluster="tacl")
207+
def apply_permissions_to_backup_groups(cfg: WorkspaceConfig):
208+
"""Second phase of the workspace-local group migration process. It does the following:
209209
- Creates a backup of every workspace-local group, adding a prefix that can be set in the configuration
210210
- Assigns the full set of permissions of the original group to the backup one
211-
- Creates an account-level group with the original name of the workspace-local one
212-
- Assigns the full set of permissions of the original group to the account-level one
213211
214212
It covers local workspace-local permissions for all entities: Legacy Table ACLs, Entitlements,
215213
AWS instance profiles, Clusters, Cluster policies, Instance Pools, Databricks SQL warehouses, Delta Live
@@ -231,13 +229,55 @@ def migrate_permissions(cfg: WorkspaceConfig):
231229
num_threads=cfg.num_threads,
232230
workspace_start_path=cfg.workspace_start_path,
233231
)
234-
235232
permission_manager.apply_group_permissions(group_manager.migration_state, destination="backup")
233+
234+
235+
@task("003-replace-workspace-local-with-account-groups", depends_on=[apply_permissions_to_backup_groups])
236+
def replace_workspace_groups_with_account_groups(cfg: WorkspaceConfig):
237+
"""Third phase of the workspace-local group migration process. It does the following:
238+
- Creates an account-level group with the original name of the workspace-local one"""
239+
ws = WorkspaceClient(config=cfg.to_databricks_config())
240+
group_manager = GroupManager(ws, cfg.groups)
241+
group_manager.prepare_groups_in_environment()
242+
if not group_manager.has_groups():
243+
logger.info("Skipping group migration as no groups were found.")
244+
return
236245
group_manager.replace_workspace_groups_with_account_groups()
246+
247+
248+
@task(
249+
"004-apply-permissions-to-account-groups",
250+
depends_on=[replace_workspace_groups_with_account_groups],
251+
job_cluster="tacl",
252+
)
253+
def apply_permissions_to_account_groups(cfg: WorkspaceConfig):
254+
"""Fourth phase of the workspace-local group migration process. It does the following:
255+
- Assigns the full set of permissions of the original group to the account-level one
256+
257+
It covers local workspace-local permissions for all entities: Legacy Table ACLs, Entitlements,
258+
AWS instance profiles, Clusters, Cluster policies, Instance Pools, Databricks SQL warehouses, Delta Live
259+
Tables, Jobs, MLflow experiments, MLflow registry, SQL Dashboards & Queries, SQL Alerts, Token and Password usage
260+
permissions, Secret Scopes, Notebooks, Directories, Repos, Files.
261+
262+
See [interactive tutorial here](https://app.getreprise.com/launch/myM3VNn/)."""
263+
ws = WorkspaceClient(config=cfg.to_databricks_config())
264+
group_manager = GroupManager(ws, cfg.groups)
265+
group_manager.prepare_groups_in_environment()
266+
if not group_manager.has_groups():
267+
logger.info("Skipping group migration as no groups were found.")
268+
return
269+
270+
permission_manager = PermissionManager.factory(
271+
ws,
272+
RuntimeBackend(),
273+
cfg.inventory_database,
274+
num_threads=cfg.num_threads,
275+
workspace_start_path=cfg.workspace_start_path,
276+
)
237277
permission_manager.apply_group_permissions(group_manager.migration_state, destination="account")
238278

239279

240-
@task("migrate-groups-cleanup", depends_on=[migrate_permissions])
280+
@task("005-remove-workspace-local-backup-groups", depends_on=[apply_permissions_to_account_groups])
241281
def delete_backup_groups(cfg: WorkspaceConfig):
242282
"""Last step of the group migration process. Removes all workspace-level backup groups, along with their
243283
permissions. Execute this workflow only after you've confirmed that workspace-local migration worked
@@ -247,7 +287,7 @@ def delete_backup_groups(cfg: WorkspaceConfig):
247287
group_manager.delete_backup_groups()
248288

249289

250-
@task("destroy-schema")
290+
@task("099-destroy-schema")
251291
def destroy_schema(cfg: WorkspaceConfig):
252292
"""This _clean-up_ workflow allows to removes the `$inventory` database, with all the inventory tables created by
253293
the previous workflow runs. Use this to reset the entire state and start with the assessment step again."""

src/databricks/labs/ucx/workspace_access/groups.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import collections
12
import json
23
import logging
34
import typing
@@ -110,6 +111,28 @@ def delete_backup_groups(self):
110111
self._delete_workspace_group(group)
111112
logger.info("Backup groups were successfully deleted")
112113

114+
def get_workspace_membership(self, resource_type: str = "WorkspaceGroup"):
115+
membership = collections.defaultdict(set)
116+
for g in self._ws.groups.list(attributes=self._SCIM_ATTRIBUTES):
117+
if g.display_name in self._SYSTEM_GROUPS:
118+
continue
119+
if g.meta.resource_type != resource_type:
120+
continue
121+
if g.members is None:
122+
continue
123+
for m in g.members:
124+
membership[g.display_name].add(m.display)
125+
return membership
126+
127+
def get_account_membership(self):
128+
membership = collections.defaultdict(set)
129+
for g in self._account_groups:
130+
if g.members is None:
131+
continue
132+
for m in g.members:
133+
membership[g.display_name].add(m.display)
134+
return membership
135+
113136
def _list_workspace_groups(self) -> list[iam.Group]:
114137
logger.info("Listing workspace groups...")
115138
workspace_groups = [
@@ -159,7 +182,7 @@ def _get_or_create_backup_group(self, source_group_name: str, source_group: iam.
159182
entitlements=source_group.entitlements,
160183
roles=source_group.roles,
161184
members=source_group.members,
162-
)
185+
) # TODO: there still could be a corner case, where we get `Group with name db-temp-XXX already exists.`
163186
self._workspace_groups.append(backup_group)
164187
logger.info(f"Backup group {backup_group_name} successfully created")
165188

0 commit comments

Comments
 (0)