Skip to content

Commit 7917906

Browse files
authored
Add a check for existing inventory database to avoid losing existing, inject installation objects in tests and try fetching existing installation before setting global as default (#1043)
1 parent c945347 commit 7917906

File tree

3 files changed

+112
-34
lines changed

3 files changed

+112
-34
lines changed

src/databricks/labs/ucx/install.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import databricks.sdk.errors
1515
from databricks.labs.blueprint.entrypoint import get_logger
16-
from databricks.labs.blueprint.installation import Installation
16+
from databricks.labs.blueprint.installation import Installation, SerdeError
1717
from databricks.labs.blueprint.installer import InstallState
1818
from databricks.labs.blueprint.parallel import ManyError, Threads
1919
from databricks.labs.blueprint.tui import Prompts
@@ -265,6 +265,8 @@ def _configure_new_installation(self) -> WorkspaceConfig:
265265
inventory_database = self._prompts.question(
266266
"Inventory Database stored in hive_metastore", default="ucx", valid_regex=r"^\w+$"
267267
)
268+
269+
self._check_inventory_database_exists(inventory_database)
268270
warehouse_id = self._configure_warehouse()
269271
configure_groups = ConfigureGroups(self._prompts)
270272
configure_groups.run()
@@ -331,6 +333,20 @@ def warehouse_type(_):
331333
warehouse_id = new_warehouse.id
332334
return warehouse_id
333335

336+
def _check_inventory_database_exists(self, inventory_database: str):
337+
logger.info("Fetching installations...")
338+
for installation in Installation.existing(self._ws, self._product_info.product_name()):
339+
try:
340+
config = installation.load(WorkspaceConfig)
341+
if config.inventory_database == inventory_database:
342+
raise AlreadyExists(
343+
f"Inventory database '{inventory_database}' already exists in another installation"
344+
)
345+
except NotFound:
346+
continue
347+
except SerdeError:
348+
continue
349+
334350

335351
class WorkspaceInstallation:
336352
def __init__(
@@ -991,6 +1007,9 @@ def validate_and_run(self, step: str):
9911007
logger.setLevel("INFO")
9921008
app = ProductInfo.from_class(WorkspaceConfig)
9931009
workspace_client = WorkspaceClient(product="ucx", product_version=__version__)
994-
current = Installation.assume_global(workspace_client, app.product_name())
1010+
try:
1011+
current = app.current_installation(workspace_client)
1012+
except NotFound:
1013+
current = Installation.assume_global(workspace_client, app.product_name())
9951014
installer = WorkspaceInstaller(Prompts(), current, workspace_client, app)
9961015
installer.run()

tests/integration/test_installation.py

Lines changed: 44 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from databricks.labs.blueprint.parallel import Threads
1414
from databricks.labs.blueprint.tui import MockPrompts
1515
from databricks.labs.blueprint.wheels import ProductInfo
16-
from databricks.sdk.errors import InvalidParameterValue, NotFound
16+
from databricks.sdk.errors import AlreadyExists, InvalidParameterValue, NotFound
1717
from databricks.sdk.retries import retried
1818
from databricks.sdk.service import compute, sql
1919
from databricks.sdk.service.iam import PermissionLevel
@@ -38,11 +38,11 @@ def new_installation(ws, sql_backend, env_or_skip, inventory_schema):
3838

3939
def factory(
4040
config_transform: Callable[[WorkspaceConfig], WorkspaceConfig] | None = None,
41+
installation: Installation | None = None,
4142
product_info: ProductInfo | None = None,
4243
environ: dict[str, str] | None = None,
4344
extend_prompts: dict[str, str] | None = None,
44-
single_user_install: bool = False,
45-
fresh_install: bool = True,
45+
inventory_schema_suffix: str = "",
4646
):
4747
if not product_info:
4848
product_info = ProductInfo.for_testing(WorkspaceConfig)
@@ -58,7 +58,7 @@ def factory(
5858
r".*PRO or SERVERLESS SQL warehouse.*": "1",
5959
r"Choose how to map the workspace groups.*": "1",
6060
r".*connect to the external metastore?.*": "yes",
61-
r".*Inventory Database.*": inventory_schema,
61+
r".*Inventory Database.*": f"{inventory_schema}{inventory_schema_suffix}",
6262
r".*Backup prefix*": renamed_group_prefix,
6363
r".*": "",
6464
}
@@ -75,19 +75,8 @@ def factory(
7575
],
7676
)
7777

78-
if not fresh_install:
79-
installation = product_info.current_installation(ws)
80-
elif single_user_install:
81-
installation = Installation(
82-
ws,
83-
product_info.product_name(),
84-
install_folder=f"/Users/{ws.current_user.me().user_name}/.{product_info.product_name()}",
85-
)
86-
else:
87-
installation = Installation(
88-
ws, product_info.product_name(), install_folder=f"/Applications/{product_info.product_name()}"
89-
)
90-
78+
if not installation:
79+
installation = Installation(ws, product_info.product_name())
9180
installer = WorkspaceInstaller(prompts, installation, ws, product_info, environ)
9281
workspace_config = installer.configure()
9382
installation = product_info.current_installation(ws)
@@ -350,31 +339,36 @@ def test_uninstallation(ws, sql_backend, new_installation):
350339
sql_backend.execute(f"show tables from hive_metastore.{install.config.inventory_database}")
351340

352341

353-
def test_fresh_global_installation(new_installation):
342+
def test_fresh_global_installation(ws, new_installation):
354343
product_info = ProductInfo.for_testing(WorkspaceConfig)
355344
global_installation = new_installation(
356345
product_info=product_info,
346+
installation=Installation.assume_global(ws, product_info.product_name()),
357347
)
358348
assert global_installation.folder == f"/Applications/{product_info.product_name()}"
359349
global_installation.uninstall()
360350

361351

362352
def test_fresh_user_installation(ws, new_installation):
363353
product_info = ProductInfo.for_testing(WorkspaceConfig)
364-
user_installation = new_installation(product_info=product_info, single_user_install=True)
354+
user_installation = new_installation(
355+
product_info=product_info,
356+
installation=Installation.assume_user_home(ws, product_info.product_name()),
357+
)
365358
assert user_installation.folder == f"/Users/{ws.current_user.me().user_name}/.{product_info.product_name()}"
366359
user_installation.uninstall()
367360

368361

369-
def test_global_installation_on_existing_global_install(new_installation):
362+
def test_global_installation_on_existing_global_install(ws, new_installation):
370363
product_info = ProductInfo.for_testing(WorkspaceConfig)
371364
existing_global_installation = new_installation(
372365
product_info=product_info,
366+
installation=Installation.assume_global(ws, product_info.product_name()),
373367
)
374368
assert existing_global_installation.folder == f"/Applications/{product_info.product_name()}"
375369
reinstall_global = new_installation(
376370
product_info=product_info,
377-
fresh_install=False,
371+
installation=Installation.assume_global(ws, product_info.product_name()),
378372
)
379373
assert reinstall_global.folder == f"/Applications/{product_info.product_name()}"
380374
reinstall_global.uninstall()
@@ -385,13 +379,14 @@ def test_user_installation_on_existing_global_install(ws, new_installation):
385379
product_info = ProductInfo.for_testing(WorkspaceConfig)
386380
existing_global_installation = new_installation(
387381
product_info=product_info,
382+
installation=Installation.assume_global(ws, product_info.product_name()),
388383
)
389384

390385
# warning to be thrown by installer if override environment variable present but no confirmation
391386
with pytest.raises(RuntimeWarning) as err:
392387
new_installation(
393388
product_info=product_info,
394-
fresh_install=False,
389+
installation=Installation.assume_global(ws, product_info.product_name()),
395390
environ={'UCX_FORCE_INSTALL': 'user'},
396391
extend_prompts={
397392
r".*UCX is already installed on this workspace.*": 'no',
@@ -402,11 +397,12 @@ def test_user_installation_on_existing_global_install(ws, new_installation):
402397
# successful override with confirmation
403398
reinstall_user_force = new_installation(
404399
product_info=product_info,
405-
fresh_install=False,
400+
installation=Installation.assume_global(ws, product_info.product_name()),
406401
environ={'UCX_FORCE_INSTALL': 'user'},
407402
extend_prompts={
408403
r".*UCX is already installed on this workspace.*": 'yes',
409404
},
405+
inventory_schema_suffix="_reinstall",
410406
)
411407
assert reinstall_user_force.folder == f"/Users/{ws.current_user.me().user_name}/.{product_info.product_name()}"
412408
reinstall_user_force.uninstall()
@@ -416,7 +412,9 @@ def test_user_installation_on_existing_global_install(ws, new_installation):
416412
def test_global_installation_on_existing_user_install(ws, new_installation):
417413
# existing installation at user level
418414
product_info = ProductInfo.for_testing(WorkspaceConfig)
419-
existing_user_installation = new_installation(product_info=product_info, single_user_install=True)
415+
existing_user_installation = new_installation(
416+
product_info=product_info, installation=Installation.assume_user_home(ws, product_info.product_name())
417+
)
420418
assert (
421419
existing_user_installation.folder == f"/Users/{ws.current_user.me().user_name}/.{product_info.product_name()}"
422420
)
@@ -425,7 +423,7 @@ def test_global_installation_on_existing_user_install(ws, new_installation):
425423
with pytest.raises(RuntimeWarning) as err:
426424
new_installation(
427425
product_info=product_info,
428-
fresh_install=False,
426+
installation=Installation.assume_user_home(ws, product_info.product_name()),
429427
environ={'UCX_FORCE_INSTALL': 'global'},
430428
extend_prompts={
431429
r".*UCX is already installed on this workspace.*": 'no',
@@ -434,15 +432,34 @@ def test_global_installation_on_existing_user_install(ws, new_installation):
434432
assert err.value.args[0] == "UCX is already installed, but no confirmation"
435433

436434
# not implemented error with confirmation
437-
438435
with pytest.raises(databricks.sdk.errors.NotImplemented) as err:
439436
new_installation(
440437
product_info=product_info,
441-
fresh_install=False,
438+
installation=Installation.assume_user_home(ws, product_info.product_name()),
442439
environ={'UCX_FORCE_INSTALL': 'global'},
443440
extend_prompts={
444441
r".*UCX is already installed on this workspace.*": 'yes',
445442
},
446443
)
447444
assert err.value.args[0] == "Migration needed. Not implemented yet."
448445
existing_user_installation.uninstall()
446+
447+
448+
def test_check_inventory_database_exists(ws, new_installation):
449+
product_info = ProductInfo.for_testing(WorkspaceConfig)
450+
install = new_installation(
451+
product_info=product_info,
452+
installation=Installation.assume_global(ws, product_info.product_name()),
453+
)
454+
inventory_database = install.config.inventory_database
455+
456+
with pytest.raises(AlreadyExists) as err:
457+
new_installation(
458+
product_info=product_info,
459+
installation=Installation.assume_global(ws, product_info.product_name()),
460+
environ={'UCX_FORCE_INSTALL': 'user'},
461+
extend_prompts={
462+
r".*UCX is already installed on this workspace.*": 'yes',
463+
},
464+
)
465+
assert err.value.args[0] == f"Inventory database '{inventory_database}' already exists in another installation"

tests/unit/test_install.py

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import io
12
import json
23
from datetime import datetime, timedelta
34
from unittest.mock import MagicMock, create_autospec, patch
45

56
import pytest
7+
import yaml
68
from databricks.labs.blueprint.installation import Installation, MockInstallation
79
from databricks.labs.blueprint.installer import InstallState
810
from databricks.labs.blueprint.parallel import ManyError
@@ -16,6 +18,7 @@
1618
from databricks.labs.lsql.backends import MockBackend
1719
from databricks.sdk import WorkspaceClient
1820
from databricks.sdk.errors import ( # pylint: disable=redefined-builtin
21+
AlreadyExists,
1922
InvalidParameterValue,
2023
NotFound,
2124
NotImplemented,
@@ -84,6 +87,26 @@ def mock_clusters():
8487

8588
@pytest.fixture
8689
def ws():
90+
state = {
91+
"/Applications/ucx/config.yml": yaml.dump(
92+
{
93+
'version': 1,
94+
'inventory_database': 'ucx_exists',
95+
'connect': {
96+
'host': '...',
97+
'token': '...',
98+
},
99+
}
100+
),
101+
}
102+
103+
def download(path: str) -> io.StringIO | io.BytesIO:
104+
if path not in state:
105+
raise NotFound(path)
106+
if ".csv" in path:
107+
return io.BytesIO(state[path].encode('utf-8'))
108+
return io.StringIO(state[path])
109+
87110
workspace_client = create_autospec(WorkspaceClient)
88111

89112
workspace_client.current_user.me = lambda: iam.User(
@@ -107,6 +130,7 @@ def ws():
107130
workspace_client.cluster_policies.create.return_value = CreatePolicyResponse(policy_id="foo")
108131
workspace_client.clusters.select_spark_version = lambda latest: "14.2.x-scala2.12"
109132
workspace_client.clusters.select_node_type = lambda local_disk: "Standard_F4s"
133+
workspace_client.workspace.download = download
110134

111135
return workspace_client
112136

@@ -503,6 +527,7 @@ def test_create_cluster_policy(ws, mock_installation):
503527
r".*": "",
504528
}
505529
)
530+
ws.workspace.get_status = not_found
506531
install = WorkspaceInstaller(prompts, mock_installation, ws, PRODUCT_INFO)
507532
install.configure()
508533
mock_installation.assert_file_written(
@@ -763,7 +788,6 @@ def test_repair_run(ws, mocker, any_prompt, mock_installation_with_jobs):
763788
state=RunState(result_state=RunResultState.FAILED),
764789
)
765790
]
766-
mocker.patch("webbrowser.open")
767791
ws.jobs.list_runs.return_value = base
768792
ws.jobs.list_runs.repair_run = None
769793

@@ -1087,7 +1111,7 @@ def test_save_config_should_include_databases(ws, mock_installation):
10871111
r".*": "",
10881112
}
10891113
)
1090-
1114+
ws.workspace.get_status = not_found
10911115
install = WorkspaceInstaller(prompts, mock_installation, ws, PRODUCT_INFO)
10921116
install.configure()
10931117

@@ -1190,7 +1214,6 @@ def test_fresh_install(ws, mock_installation):
11901214

11911215

11921216
def test_get_existing_installation_global(ws, mock_installation, mocker):
1193-
mocker.patch("webbrowser.open")
11941217
prompts = MockPrompts(
11951218
{
11961219
r".*PRO or SERVERLESS SQL warehouse.*": "1",
@@ -1221,9 +1244,8 @@ def test_get_existing_installation_global(ws, mock_installation, mocker):
12211244

12221245
# test for force user install variable without prompts
12231246
second_install = WorkspaceInstaller(prompts, installation, ws, PRODUCT_INFO, force_user_environ)
1224-
with pytest.raises(RuntimeWarning) as err:
1247+
with pytest.raises(RuntimeWarning, match='UCX is already installed, but no confirmation'):
12251248
second_install.configure()
1226-
assert err.value.args[0] == 'UCX is already installed, but no confirmation'
12271249

12281250
# test for force user install variable with prompts
12291251
prompts = MockPrompts(
@@ -1318,3 +1340,23 @@ def test_databricks_runtime_version_set(ws, mock_installation):
13181340

13191341
with pytest.raises(SystemExit, match="WorkspaceInstaller is not supposed to be executed in Databricks Runtime"):
13201342
WorkspaceInstaller(prompts, mock_installation, ws, product_info, environ)
1343+
1344+
1345+
def test_check_inventory_database_exists(ws, mock_installation):
1346+
ws.current_user.me().user_name = "foo"
1347+
1348+
prompts = MockPrompts(
1349+
{
1350+
r".*Inventory Database stored in hive_metastore": "ucx_exists",
1351+
r".*": "",
1352+
}
1353+
)
1354+
1355+
installation_type_mock = create_autospec(Installation)
1356+
installation_type_mock.load.side_effect = NotFound
1357+
1358+
installation = Installation(ws, 'ucx')
1359+
install = WorkspaceInstaller(prompts, installation, ws, PRODUCT_INFO)
1360+
1361+
with pytest.raises(AlreadyExists, match="Inventory database 'ucx_exists' already exists in another installation"):
1362+
install.configure()

0 commit comments

Comments
 (0)