Skip to content

Commit 104e1b7

Browse files
Fixing installer not being able to migrate from V1 to V2 (#596)
- [x] Needs to verify if "backup_group_prefix" is properly migrated - [x] Needs to verify if "selected_groups" is properly migrated - [x] Needs to verify if "auto:True" is properly migrated - [ ] Needs to verify if previous config and jobs are overriden by V2 config - [x] Needs to work on a fresh installation - [x] Needs to work when being re-run
1 parent 80e2217 commit 104e1b7

File tree

3 files changed

+139
-19
lines changed

3 files changed

+139
-19
lines changed

src/databricks/labs/ucx/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,11 @@ def _migrate_from_v1(cls, raw: dict):
206206
if stored_version == _CONFIG_VERSION:
207207
return raw
208208
if stored_version == 1:
209-
raw["include_group_names"] = raw.get("groups", {"selected": []})["selected"]
209+
raw["include_group_names"] = (
210+
raw.get("groups", {"selected": []})["selected"] if "selected" in raw["groups"] else None
211+
)
210212
raw["renamed_group_prefix"] = raw.get("groups", {"backup_group_prefix": "db-temp-"})["backup_group_prefix"]
213+
raw.pop("groups", None)
211214
return raw
212215
msg = f"Unknown config version: {stored_version}"
213216
raise ValueError(msg)

src/databricks/labs/ucx/install.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import yaml
1717
from databricks.sdk import WorkspaceClient
1818
from databricks.sdk.core import DatabricksError
19-
from databricks.sdk.errors import OperationFailed
19+
from databricks.sdk.errors import NotFound, OperationFailed
2020
from databricks.sdk.mixins.compute import SemVer
2121
from databricks.sdk.service import compute, jobs
2222
from databricks.sdk.service.sql import EndpointInfoWarehouseType, SpotInstancePolicy
@@ -146,6 +146,7 @@ def __init__(
146146

147147
def run(self):
148148
logger.info(f"Installing UCX v{self._version}")
149+
149150
self._configure()
150151
self._run_configured()
151152

@@ -215,7 +216,7 @@ def run_for_config(
215216
workspace_installer = WorkspaceInstaller(ws, prefix=prefix, promtps=False, sql_backend=sql_backend)
216217
logger.info(f"Installing UCX v{workspace_installer._version} on {ws.config.host}")
217218
workspace_installer._config = config
218-
workspace_installer._write_config()
219+
workspace_installer._write_config(overwrite=False)
219220
workspace_installer._override_clusters = override_clusters
220221
# TODO: rather introduce a method `is_configured`, as we may want to reconfigure workspaces for some reason
221222
workspace_installer._run_configured()
@@ -316,6 +317,10 @@ def _current_config(self):
316317
self._config = WorkspaceConfig.from_bytes(f.read())
317318
return self._config
318319

320+
def _raw_previous_config(self):
321+
with self._ws.workspace.download(self.config_file) as f:
322+
return str(f.read())
323+
319324
def _name(self, name: str) -> str:
320325
return f"[{self._prefix.upper()}] {name}"
321326

@@ -337,13 +342,17 @@ def _configure_inventory_database(self):
337342
def _configure(self):
338343
ws_file_url = self.notebook_link(self.config_file)
339344
try:
340-
self._ws.workspace.get_status(self.config_file)
341-
logger.info(f"UCX is already configured. See {ws_file_url}")
342-
return
343-
except DatabricksError as err:
345+
if "version: 1" in self._raw_previous_config():
346+
logger.info("old version detected, attempting to migrate to new config")
347+
self._config = self._current_config
348+
self._write_config(overwrite=True)
349+
return
350+
elif "version: 2" in self._raw_previous_config():
351+
logger.info(f"UCX is already configured. See {ws_file_url}")
352+
return
353+
except NotFound as err:
344354
if err.error_code != "RESOURCE_DOES_NOT_EXIST":
345355
raise err
346-
347356
logger.info("Please answer a couple of questions to configure Unity Catalog migration")
348357
inventory_database = self._configure_inventory_database()
349358

@@ -418,12 +427,12 @@ def warehouse_type(_):
418427
spark_conf=spark_conf_dict,
419428
)
420429

421-
self._write_config()
430+
self._write_config(overwrite=False)
422431
msg = "Open config file in the browser and continue installing?"
423432
if self._prompts and self._question(msg, default="yes") == "yes":
424433
webbrowser.open(ws_file_url)
425434

426-
def _write_config(self):
435+
def _write_config(self, overwrite):
427436
try:
428437
self._ws.workspace.get_status(self._install_folder)
429438
except DatabricksError as err:
@@ -434,7 +443,7 @@ def _write_config(self):
434443

435444
config_bytes = yaml.dump(self._config.as_dict()).encode("utf8")
436445
logger.info(f"Creating configuration file: {self.config_file}")
437-
self._ws.workspace.upload(self.config_file, config_bytes, format=ImportFormat.AUTO)
446+
self._ws.workspace.upload(self.config_file, config_bytes, format=ImportFormat.AUTO, overwrite=overwrite)
438447

439448
def _create_jobs(self):
440449
if not self._state.jobs:

tests/unit/test_install.py

Lines changed: 116 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88
import yaml
99
from databricks.sdk.core import DatabricksError
10-
from databricks.sdk.errors import OperationFailed
10+
from databricks.sdk.errors import NotFound, OperationFailed
1111
from databricks.sdk.service import iam, jobs
1212
from databricks.sdk.service.compute import (
1313
GlobalInitScriptDetails,
@@ -41,8 +41,6 @@ def ws(mocker):
4141
ws.current_user.me = lambda: iam.User(user_name="[email protected]", groups=[iam.ComplexValue(display="admins")])
4242
ws.config.host = "https://foo"
4343
ws.config.is_aws = True
44-
config_bytes = yaml.dump(WorkspaceConfig(inventory_database="a").as_dict()).encode("utf8")
45-
ws.workspace.download = lambda _: io.BytesIO(config_bytes)
4644
ws.workspace.get_status = lambda _: ObjectInfo(object_id=123)
4745
ws.data_sources.list = lambda: [DataSource(id="bcd", warehouse_id="abc")]
4846
ws.warehouses.list = lambda **_: [EndpointInfo(id="abc", warehouse_type=EndpointInfoWarehouseType.PRO)]
@@ -55,6 +53,8 @@ def ws(mocker):
5553

5654

5755
def test_replace_clusters_for_integration_tests(ws):
56+
config_bytes = yaml.dump(WorkspaceConfig(inventory_database="a").as_dict()).encode("utf8")
57+
ws.workspace.download = lambda _: io.BytesIO(config_bytes)
5858
return_value = WorkspaceInstaller.run_for_config(
5959
ws,
6060
WorkspaceConfig(inventory_database="a"),
@@ -148,22 +148,127 @@ def not_found(_):
148148
workspace_start_path: /
149149
""",
150150
format=ImportFormat.AUTO,
151+
overwrite=False,
152+
)
153+
154+
155+
def test_migrate_from_v1(ws, mocker):
156+
mocker.patch("builtins.input", return_value="yes")
157+
mock_file = MagicMock()
158+
mocker.patch("builtins.open", return_value=mock_file)
159+
mocker.patch("base64.b64encode")
160+
161+
ws.current_user.me = lambda: iam.User(user_name="[email protected]", groups=[iam.ComplexValue(display="admins")])
162+
ws.config.host = "https://foo"
163+
ws.config.is_aws = True
164+
config_bytes = b"""default_catalog: ucx_default
165+
groups:
166+
auto: true
167+
backup_group_prefix: db-temp-
168+
inventory_database: ucx
169+
log_level: INFO
170+
num_threads: 8
171+
version: 1
172+
warehouse_id: abc
173+
workspace_start_path: /
174+
"""
175+
ws.workspace.download = lambda _: io.BytesIO(config_bytes)
176+
ws.workspace.get_status = lambda _: ObjectInfo(object_id=123)
177+
mocker.patch("builtins.input", return_value="42")
178+
179+
ws.warehouses.list = lambda **_: [
180+
EndpointInfo(id="abc", warehouse_type=EndpointInfoWarehouseType.PRO, state=State.RUNNING)
181+
]
182+
ws.cluster_policies.list = lambda: []
183+
184+
install = WorkspaceInstaller(ws)
185+
install._choice = lambda _1, _2: "None (abc, PRO, RUNNING)"
186+
install._configure()
187+
188+
ws.workspace.upload.assert_called_with(
189+
"/Users/[email protected]/.ucx/config.yml",
190+
b"""default_catalog: ucx_default
191+
inventory_database: ucx
192+
log_level: INFO
193+
num_threads: 8
194+
renamed_group_prefix: db-temp-
195+
version: 2
196+
warehouse_id: abc
197+
workspace_start_path: /
198+
""",
199+
format=ImportFormat.AUTO,
200+
overwrite=True,
201+
)
202+
203+
204+
def test_migrate_from_v1_selected_groups(ws, mocker):
205+
mocker.patch("builtins.input", return_value="yes")
206+
mock_file = MagicMock()
207+
mocker.patch("builtins.open", return_value=mock_file)
208+
mocker.patch("base64.b64encode")
209+
210+
ws.current_user.me = lambda: iam.User(user_name="[email protected]", groups=[iam.ComplexValue(display="admins")])
211+
ws.config.host = "https://foo"
212+
ws.config.is_aws = True
213+
config_bytes = b"""default_catalog: ucx_default
214+
groups:
215+
backup_group_prefix: 'backup_baguette_prefix'
216+
selected:
217+
- '42'
218+
- '100'
219+
inventory_database: '42'
220+
log_level: '42'
221+
num_threads: 42
222+
version: 1
223+
warehouse_id: abc
224+
workspace_start_path: /
225+
"""
226+
ws.workspace.download = lambda _: io.BytesIO(config_bytes)
227+
ws.workspace.get_status = lambda _: ObjectInfo(object_id=123)
228+
mocker.patch("builtins.input", return_value="42")
229+
230+
ws.warehouses.list = lambda **_: [
231+
EndpointInfo(id="abc", warehouse_type=EndpointInfoWarehouseType.PRO, state=State.RUNNING)
232+
]
233+
ws.cluster_policies.list = lambda: []
234+
235+
install = WorkspaceInstaller(ws)
236+
install._choice = lambda _1, _2: "None (abc, PRO, RUNNING)"
237+
install._configure()
238+
239+
ws.workspace.upload.assert_called_with(
240+
"/Users/[email protected]/.ucx/config.yml",
241+
b"""default_catalog: ucx_default
242+
include_group_names:
243+
- '42'
244+
- '100'
245+
inventory_database: '42'
246+
log_level: '42'
247+
num_threads: 42
248+
renamed_group_prefix: backup_baguette_prefix
249+
version: 2
250+
warehouse_id: abc
251+
workspace_start_path: /
252+
""",
253+
format=ImportFormat.AUTO,
254+
overwrite=True,
151255
)
152256

153257

154258
def test_save_config_with_error(ws, mocker):
155259
def not_found(_):
156-
raise DatabricksError(error_code="RAISED_FOR_TESTING")
260+
raise NotFound(message="File not found")
157261

158262
mocker.patch("builtins.input", return_value="42")
159263

160-
ws.workspace.get_status = not_found
264+
ws.workspace.download = not_found
161265
ws.cluster_policies.list = lambda: []
162266

163267
install = WorkspaceInstaller(ws)
164-
with pytest.raises(DatabricksError) as e_info:
268+
with pytest.raises(NotFound) as e_info:
165269
install._configure()
166-
assert str(e_info.value.error_code) == "RAISED_FOR_TESTING"
270+
271+
assert str(e_info.value.args[0]) == "File not found"
167272

168273

169274
def test_save_config_auto_groups(ws, mocker):
@@ -201,6 +306,7 @@ def mock_question(text: str, *, default: str | None = None) -> str:
201306
workspace_start_path: /
202307
""",
203308
format=ImportFormat.AUTO,
309+
overwrite=False,
204310
)
205311

206312

@@ -243,6 +349,7 @@ def mock_question(text: str, *, default: str | None = None) -> str:
243349
workspace_start_path: /
244350
""",
245351
format=ImportFormat.AUTO,
352+
overwrite=False,
246353
)
247354

248355

@@ -307,6 +414,7 @@ def mock_choice_from_dict(text: str, choices: dict[str, Any]) -> Any:
307414
workspace_start_path: /
308415
""",
309416
format=ImportFormat.AUTO,
417+
overwrite=False,
310418
)
311419

312420

@@ -330,7 +438,7 @@ def test_main_with_existing_conf_does_not_recreate_config(ws, mocker):
330438
renamed_group_prefix: '42'
331439
spark_conf:
332440
spark.databricks.hive.metastore.glueCatalog.enabled: 'true'
333-
version: 1
441+
version: 2
334442
warehouse_id: abc
335443
workspace_start_path: /
336444
"""

0 commit comments

Comments
 (0)