Skip to content

Commit 2168f20

Browse files
authored
Decouple group configuration from install.py (#714)
This PR reduces the size of `install.py` file
1 parent 06063fb commit 2168f20

File tree

5 files changed

+131
-101
lines changed

5 files changed

+131
-101
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ python="3.10"
6161
path = ".venv"
6262

6363
[tool.hatch.envs.default.scripts]
64-
test = "pytest -n auto --cov src --cov-report=xml --timeout 10 tests/unit"
65-
coverage = "pytest -n auto --cov src tests/unit --timeout 10 --cov-report=html"
64+
test = "pytest -n auto --cov src --cov-report=xml --timeout 30 tests/unit"
65+
coverage = "pytest -n auto --cov src tests/unit --timeout 30 --cov-report=html"
6666
integration = "pytest -n 10 --cov src tests/integration"
6767
fmt = ["isort .",
6868
"black .",

src/databricks/labs/ucx/install.py

Lines changed: 9 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
import functools
21
import json
32
import logging
43
import os
5-
import re
64
import sys
75
import time
86
import webbrowser
@@ -47,7 +45,7 @@
4745
from databricks.labs.ucx.runtime import main
4846
from databricks.labs.ucx.workspace_access.base import Permissions
4947
from databricks.labs.ucx.workspace_access.generic import WorkspaceObjectInfo
50-
from databricks.labs.ucx.workspace_access.groups import MigratedGroup
48+
from databricks.labs.ucx.workspace_access.groups import ConfigureGroups, MigratedGroup
5149

5250
TAG_STEP = "step"
5351
TAG_APP = "App"
@@ -380,77 +378,10 @@ def warehouse_type(_):
380378
)
381379
warehouse_id = new_warehouse.id
382380

383-
# Setting up group migration parameters
384-
groups_config_args = {}
385-
386-
ask_for_group = functools.partial(self._prompts.question, validate=self._is_valid_group_str)
387-
ask_for_regex = functools.partial(self._prompts.question, validate=self._validate_regex)
388-
while self._prompts.confirm(
389-
"Do you need to convert the workspace groups to match the account groups' name?"
390-
" If the workspace groups' names match the account groups' names select no"
391-
" or hit <Enter/Return>."
392-
):
393-
logger.info("Setting up group name translation")
394-
groups_config_args["convert_group_names"] = "yes"
395-
choices = {
396-
"Apply a Prefix": "prefix",
397-
"Apply a Suffix": "suffix",
398-
"Regex Substitution": "sub",
399-
"Regex Matching": "match",
400-
"Match By External ID": "external",
401-
"Cancel": "cancel",
402-
}
403-
choice = self._prompts.choice_from_dict("Choose how to map the workspace groups:", choices, sort=False)
404-
match choice:
405-
case "cancel":
406-
continue
407-
case "prefix":
408-
prefix = ask_for_group("Enter a prefix to add to the workspace group name")
409-
if not prefix:
410-
continue
411-
groups_config_args["workspace_group_match_regex"] = "^"
412-
groups_config_args["workspace_group_replace"] = prefix
413-
case "suffix":
414-
suffix = ask_for_group("Enter a suffix to add to the workspace group name")
415-
if not suffix:
416-
continue
417-
groups_config_args["workspace_group_match_regex"] = "$"
418-
groups_config_args["workspace_group_replace"] = suffix
419-
case "sub":
420-
match_value = ask_for_regex("Enter a regular expression for substitution")
421-
if not match_value:
422-
continue
423-
sub_value = ask_for_group("Enter the substitution value")
424-
if not sub_value:
425-
continue
426-
groups_config_args["workspace_group_match_regex"] = match_value
427-
groups_config_args["workspace_group_replace"] = sub_value
428-
case "matching":
429-
ws_match_value = ask_for_regex("Enter a regular expression to match on the workspace group")
430-
if not ws_match_value:
431-
continue
432-
acct_match_value = ask_for_regex("Enter a regular expression to match on the account group")
433-
if not acct_match_value:
434-
continue
435-
groups_config_args["workspace_group_match_regex"] = ws_match_value
436-
groups_config_args["account_group_match_regex"] = acct_match_value
437-
case "external":
438-
groups_config_args["group_match_by_external_id"] = True
439-
break
440-
441-
selected_groups = self._prompts.question(
442-
"Comma-separated list of workspace group names to migrate. If not specified, we'll use all "
443-
"account-level groups with matching names to workspace-level groups",
444-
default="<ALL>",
445-
)
446-
backup_group_prefix = ask_for_group("Backup prefix", default="db-temp-")
381+
configure_groups = ConfigureGroups(self._prompts)
382+
configure_groups.run()
447383
log_level = self._prompts.question("Log level", default="INFO").upper()
448384
num_threads = int(self._prompts.question("Number of threads", default="8", valid_number=True))
449-
groups_config_args["backup_group_prefix"] = backup_group_prefix
450-
if selected_groups != "<ALL>":
451-
groups_config_args["selected"] = [x.strip() for x in selected_groups.split(",")]
452-
else:
453-
groups_config_args["selected"] = None
454385

455386
# Checking for external HMS
456387
instance_profile = None
@@ -474,12 +405,12 @@ def warehouse_type(_):
474405

475406
self._config = WorkspaceConfig(
476407
inventory_database=inventory_database,
477-
workspace_group_regex=groups_config_args.get("workspace_group_regex"),
478-
workspace_group_replace=groups_config_args.get("workspace_group_replace"),
479-
account_group_regex=groups_config_args.get("account_group_regex"),
480-
group_match_by_external_id=groups_config_args.get("group_match_by_external_id"),
481-
include_group_names=groups_config_args["selected"],
482-
renamed_group_prefix=groups_config_args["backup_group_prefix"],
408+
workspace_group_regex=configure_groups.workspace_group_regex,
409+
workspace_group_replace=configure_groups.workspace_group_replace,
410+
account_group_regex=configure_groups.account_group_regex,
411+
group_match_by_external_id=configure_groups.group_match_by_external_id,
412+
include_group_names=configure_groups.include_group_names,
413+
renamed_group_prefix=configure_groups.renamed_group_prefix,
483414
warehouse_id=warehouse_id,
484415
log_level=log_level,
485416
num_threads=num_threads,
@@ -643,15 +574,6 @@ def _create_debug(self, remote_wheel: str):
643574
def notebook_link(self, path: str) -> str:
644575
return f"{self._ws.config.host}/#workspace{path}"
645576

646-
@staticmethod
647-
def _validate_regex(regex_input: str) -> bool:
648-
try:
649-
re.compile(regex_input)
650-
return True
651-
except re.error:
652-
logger.error(f"{regex_input} is an invalid regular expression")
653-
return False
654-
655577
def _job_settings(self, step_name: str, remote_wheel: str):
656578
email_notifications = None
657579
if not self._current_config.override_clusters and "@" in self._my_username:
@@ -858,10 +780,6 @@ def _get_ext_hms_conf_from_policy(cluster_policy):
858780
spark_conf_dict[key[11:]] = cluster_policy[key]["value"]
859781
return instance_profile, spark_conf_dict
860782

861-
@staticmethod
862-
def _is_valid_group_str(group_str: str):
863-
return group_str and not re.search(r"[\s#,+ \\<>;]", group_str)
864-
865783
def latest_job_status(self) -> list[dict]:
866784
latest_status = []
867785
for step, job_id in self._state.jobs.items():

src/databricks/labs/ucx/workspace_access/groups.py

Lines changed: 95 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
2424
from databricks.labs.ucx.framework.parallel import ManyError, Threads
25+
from databricks.labs.ucx.framework.tui import Prompts
2526
from databricks.labs.ucx.mixins.hardening import rate_limited
2627

2728
logger = logging.getLogger(__name__)
@@ -73,13 +74,6 @@ def is_in_scope(self, name: str) -> bool:
7374
else:
7475
return name in self._name_to_group
7576

76-
def get_target_id(self, group_id: str) -> str | None:
77-
group = self._id_to_group.get(group_id)
78-
if group:
79-
return group.external_id
80-
else:
81-
return None
82-
8377
def __len__(self):
8478
return len(self._name_to_group)
8579

@@ -539,3 +533,97 @@ def _get_strategy(
539533
renamed_groups_prefix=self._renamed_group_prefix,
540534
include_group_names=self._include_group_names,
541535
)
536+
537+
538+
class ConfigureGroups:
539+
renamed_group_prefix = "db-temp-"
540+
workspace_group_regex = None
541+
workspace_group_replace = None
542+
account_group_regex = None
543+
group_match_by_external_id = None
544+
include_group_names = None
545+
546+
def __init__(self, prompts: Prompts):
547+
self._prompts = prompts
548+
self._ask_for_group = functools.partial(self._prompts.question, validate=self._is_valid_group_str)
549+
self._ask_for_regex = functools.partial(self._prompts.question, validate=self._validate_regex)
550+
551+
def run(self):
552+
self.renamed_group_prefix = self._ask_for_group("Backup prefix", default=self.renamed_group_prefix)
553+
strategy = self._prompts.choice_from_dict(
554+
"Choose how to map the workspace groups:",
555+
{
556+
"Apply a Prefix": self._configure_prefix,
557+
"Apply a Suffix": self._configure_suffix,
558+
"Comma-separated list of workspace group names to migrate": self._configure_names,
559+
"Match By External ID": self._configure_external,
560+
"Regex Substitution": self._configure_substitution,
561+
"Regex Matching": self._configure_matching,
562+
},
563+
)
564+
strategy()
565+
566+
def _configure_prefix(self):
567+
prefix = self._ask_for_group("Enter a prefix to add to the workspace group name")
568+
if not prefix:
569+
return False
570+
self.workspace_group_regex = "^"
571+
self.workspace_group_replace = prefix
572+
return True
573+
574+
def _configure_suffix(self):
575+
suffix = self._ask_for_group("Enter a suffix to add to the workspace group name")
576+
if not suffix:
577+
return False
578+
self.workspace_group_regex = "$"
579+
self.workspace_group_replace = suffix
580+
return True
581+
582+
def _configure_substitution(self):
583+
match_value = self._ask_for_regex("Enter a regular expression for substitution")
584+
if not match_value:
585+
return False
586+
sub_value = self._ask_for_group("Enter the substitution value")
587+
if not sub_value:
588+
return False
589+
self.workspace_group_regex = match_value
590+
self.workspace_group_replace = sub_value
591+
return True
592+
593+
def _configure_matching(self):
594+
ws_match_value = self._ask_for_regex("Enter a regular expression to match on the workspace group")
595+
if not ws_match_value:
596+
return False
597+
acct_match_value = self._ask_for_regex("Enter a regular expression to match on the account group")
598+
if not acct_match_value:
599+
return False
600+
self.workspace_group_regex = ws_match_value
601+
self.account_group_regex = acct_match_value
602+
return True
603+
604+
def _configure_names(self):
605+
selected_groups = self._prompts.question(
606+
"Comma-separated list of workspace group names to migrate. If not specified, we'll use all "
607+
"account-level groups with matching names to workspace-level groups",
608+
default="<ALL>",
609+
)
610+
if selected_groups != "<ALL>":
611+
self.include_group_names = [x.strip() for x in selected_groups.split(",")]
612+
return True
613+
614+
def _configure_external(self):
615+
self.group_match_by_external_id = True
616+
return True
617+
618+
@staticmethod
619+
def _is_valid_group_str(group_str: str):
620+
return group_str and not re.search(r"[\s#,+ \\<>;]", group_str)
621+
622+
@staticmethod
623+
def _validate_regex(regex_input: str) -> bool:
624+
try:
625+
re.compile(regex_input)
626+
return True
627+
except re.error:
628+
logger.error(f"{regex_input} is an invalid regular expression")
629+
return False

tests/unit/test_install.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ def not_found(_):
276276
promtps=MockPrompts(
277277
{
278278
r".*PRO or SERVERLESS SQL warehouse.*": "1",
279+
r"Choose how to map the workspace groups.*": "2",
279280
r".*": "",
280281
}
281282
),
@@ -411,6 +412,7 @@ def not_found(_):
411412
promtps=MockPrompts(
412413
{
413414
r".*PRO or SERVERLESS SQL warehouse.*": "1",
415+
r"Choose how to map the workspace groups.*": "2",
414416
r".*": "",
415417
}
416418
),
@@ -445,6 +447,7 @@ def test_save_config_strip_group_names(ws):
445447
promtps=MockPrompts(
446448
{
447449
r".*PRO or SERVERLESS SQL warehouse.*": "1",
450+
r"Choose how to map the workspace groups.*": "2", # specify names
448451
r".*workspace group names.*": "g1, g2, g99",
449452
r".*": "",
450453
}
@@ -506,6 +509,7 @@ def test_save_config_with_custom_policy(ws):
506509
{
507510
r".*PRO or SERVERLESS SQL warehouse.*": "1",
508511
r".*follow a policy.*": "yes",
512+
r"Choose how to map the workspace groups.*": "2",
509513
r".*Choose a cluster policy.*": "0",
510514
r".*": "",
511515
}
@@ -561,6 +565,7 @@ def test_save_config_with_glue(ws):
561565
promtps=MockPrompts(
562566
{
563567
r".*PRO or SERVERLESS SQL warehouse.*": "1",
568+
r"Choose how to map the workspace groups.*": "2",
564569
r".*connect to the external metastore?.*": "yes",
565570
r".*Choose a cluster policy.*": "0",
566571
r".*": "",

tests/unit/workspace_access/test_groups.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,12 @@
77
from databricks.sdk.service.iam import ComplexValue, Group, ResourceMeta
88

99
from databricks.labs.ucx.framework.parallel import ManyError
10-
from databricks.labs.ucx.workspace_access.groups import GroupManager, MigratedGroup
10+
from databricks.labs.ucx.framework.tui import MockPrompts
11+
from databricks.labs.ucx.workspace_access.groups import (
12+
ConfigureGroups,
13+
GroupManager,
14+
MigratedGroup,
15+
)
1116
from tests.unit.framework.mocks import MockBackend
1217

1318

@@ -679,3 +684,17 @@ def test_snapshot_with_group_matched_by_external_id():
679684
entitlements='[{"value": "allow-cluster-create"}, {"value": "allow-instance-pool-create"}]',
680685
)
681686
]
687+
688+
689+
def test_configure_groups():
690+
cg = ConfigureGroups(
691+
MockPrompts(
692+
{
693+
"Backup prefix": "",
694+
r"Choose how to map the workspace groups.*": "2", # specify names
695+
r"^Comma-separated list of workspace group names to migrate.*": "foo, bar, baz",
696+
}
697+
)
698+
)
699+
cg.run()
700+
assert ["foo", "bar", "baz"] == cg.include_group_names

0 commit comments

Comments
 (0)