Skip to content

Commit 0898bd6

Browse files
authored
Added debug notebook companion to troubleshoot the installation (#191)
This PR ensures that tests are working correctly and adds a debug notebook to troubleshoot the app on interactive clusters.
1 parent 77cb4c3 commit 0898bd6

File tree

8 files changed

+210
-49
lines changed

8 files changed

+210
-49
lines changed

src/databricks/labs/ucx/install.py

Lines changed: 113 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,52 @@
1919
from databricks.labs.ucx.runtime import main
2020
from databricks.labs.ucx.tasks import _TASKS
2121

22+
TAG_STEP = "step"
23+
TAG_APP = "App"
24+
25+
DEBUG_NOTEBOOK = """
26+
# Databricks notebook source
27+
# MAGIC %md
28+
# MAGIC # Debug companion for UCX installation (see [README]({readme_link}))
29+
# MAGIC
30+
# MAGIC Production runs are supposed to be triggered through the following jobs: {job_links}
31+
# MAGIC
32+
# MAGIC **This notebook is overwritten with each UCX update/(re)install.**
33+
34+
# COMMAND ----------
35+
36+
# MAGIC %pip install /Workspace{remote_wheel}
37+
dbutils.library.restartPython()
38+
39+
# COMMAND ----------
40+
41+
import logging
42+
from pathlib import Path
43+
from databricks.labs.ucx.__about__ import __version__
44+
from databricks.labs.ucx.config import MigrationConfig
45+
from databricks.labs.ucx import logger
46+
from databricks.sdk import WorkspaceClient
47+
48+
logger._install()
49+
logging.getLogger("databricks").setLevel("DEBUG")
50+
51+
cfg = MigrationConfig.from_file(Path("/Workspace{config_file}"))
52+
ws = WorkspaceClient()
53+
54+
print(__version__)
55+
"""
56+
2257
logger = logging.getLogger(__name__)
2358

2459

2560
class Installer:
26-
def __init__(self, ws: WorkspaceClient):
61+
def __init__(self, ws: WorkspaceClient, *, prefix: str = "ucx", promtps: bool = True):
2762
if "DATABRICKS_RUNTIME_VERSION" in os.environ:
2863
msg = "Installer is not supposed to be executed in Databricks Runtime"
2964
raise SystemExit(msg)
3065
self._ws = ws
66+
self._prefix = prefix
67+
self._prompts = promtps
3168

3269
def run(self):
3370
self._configure()
@@ -45,7 +82,7 @@ def _my_username(self):
4582

4683
@property
4784
def _install_folder(self):
48-
return f"/Users/{self._my_username}/.ucx"
85+
return f"/Users/{self._my_username}/.{self._prefix}"
4986

5087
@property
5188
def _config_file(self):
@@ -60,14 +97,13 @@ def _current_config(self):
6097
return self._config
6198

6299
def _configure(self):
63-
config_path = self._config_file
64-
ws_file_url = f"{self._ws.config.host}/#workspace{config_path}"
100+
ws_file_url = self._notebook_link(self._config_file)
65101
try:
66-
self._ws.workspace.get_status(config_path)
102+
self._ws.workspace.get_status(self._config_file)
67103
logger.info(f"UCX is already configured. See {ws_file_url}")
68-
if self._question("Type 'yes' to open config file in the browser") == "yes":
104+
if self._prompts and self._question("Type 'yes' to open config file in the browser") == "yes":
69105
webbrowser.open(ws_file_url)
70-
return config_path
106+
return
71107
except DatabricksError as err:
72108
if err.error_code != "RESOURCE_DOES_NOT_EXIST":
73109
raise err
@@ -84,41 +120,55 @@ def _configure(self):
84120
num_threads=int(self._question("Number of threads", default="8")),
85121
)
86122

87-
config_bytes = yaml.dump(self._config.as_dict()).encode("utf8")
88-
self._ws.workspace.upload(config_path, config_bytes, format=ImportFormat.AUTO)
89-
logger.info(f"Created configuration file: {config_path}")
90-
if self._question("Open config file in the browser and continue installing?", default="yes") == "yes":
123+
self._write_config()
124+
msg = "Open config file in the browser and continue installing?"
125+
if self._prompts and self._question(msg, default="yes") == "yes":
91126
webbrowser.open(ws_file_url)
92127

128+
def _write_config(self):
129+
try:
130+
self._ws.workspace.get_status(self._install_folder)
131+
except DatabricksError as err:
132+
if err.error_code != "RESOURCE_DOES_NOT_EXIST":
133+
raise err
134+
logger.debug(f"Creating install folder: {self._install_folder}")
135+
self._ws.workspace.mkdirs(self._install_folder)
136+
137+
config_bytes = yaml.dump(self._config.as_dict()).encode("utf8")
138+
logger.info(f"Creating configuration file: {self._config_file}")
139+
self._ws.workspace.upload(self._config_file, config_bytes, format=ImportFormat.AUTO)
140+
93141
def _create_jobs(self):
94142
logger.debug(f"Creating jobs from tasks in {main.__name__}")
95-
dbfs_path = self._upload_wheel()
96-
deployed_steps = self._deployed_steps()
143+
remote_wheel = self._upload_wheel()
144+
self._deployed_steps = self._deployed_steps()
97145
desired_steps = {t.workflow for t in _TASKS.values()}
98146
for step_name in desired_steps:
99-
settings = self._job_settings(step_name, dbfs_path)
100-
if step_name in deployed_steps:
101-
job_id = deployed_steps[step_name]
147+
settings = self._job_settings(step_name, remote_wheel)
148+
if step_name in self._deployed_steps:
149+
job_id = self._deployed_steps[step_name]
102150
logger.info(f"Updating configuration for step={step_name} job_id={job_id}")
103151
self._ws.jobs.reset(job_id, jobs.JobSettings(**settings))
104152
else:
105153
logger.info(f"Creating new job configuration for step={step_name}")
106-
deployed_steps[step_name] = self._ws.jobs.create(**settings).job_id
154+
self._deployed_steps[step_name] = self._ws.jobs.create(**settings).job_id
107155

108-
for step_name, job_id in deployed_steps.items():
156+
for step_name, job_id in self._deployed_steps.items():
109157
if step_name not in desired_steps:
110158
logger.info(f"Removing job_id={job_id}, as it is no longer needed")
111159
self._ws.jobs.delete(job_id)
112160

113-
self._create_readme(deployed_steps)
161+
self._create_readme()
162+
self._create_debug(remote_wheel)
114163

115-
def _create_readme(self, deployed_steps):
164+
def _create_readme(self):
116165
md = [
117166
"# UCX - The Unity Catalog Migration Assistant",
118167
"Here are the descriptions of jobs that trigger various stages of migration.",
168+
f'To troubleshoot, see [debug notebook]({self._notebook_link(f"{self._install_folder}/DEBUG.py")}).',
119169
]
120-
for step_name, job_id in deployed_steps.items():
121-
md.append(f"## [[UCX] {step_name}]({self._ws.config.host}#job/{job_id})\n")
170+
for step_name, job_id in self._deployed_steps.items():
171+
md.append(f"## [[{self._prefix.upper()}] {step_name}]({self._ws.config.host}#job/{job_id})\n")
122172
for t in _TASKS.values():
123173
if t.workflow != step_name:
124174
continue
@@ -129,12 +179,31 @@ def _create_readme(self, deployed_steps):
129179
intro = "\n".join(preamble + [f"# MAGIC {line}" for line in md])
130180
path = f"{self._install_folder}/README.py"
131181
self._ws.workspace.upload(path, intro.encode("utf8"), overwrite=True)
132-
url = f"{self._ws.config.host}/#workspace{path}"
133-
logger.info(f"Created notebook with job overview: {url}")
182+
url = self._notebook_link(path)
183+
logger.info(f"Created README notebook with job overview: {url}")
134184
msg = "Type 'yes' to open job overview in README notebook in your home directory"
135-
if self._question(msg) == "yes":
185+
if self._prompts and self._question(msg) == "yes":
136186
webbrowser.open(url)
137187

188+
def _create_debug(self, remote_wheel: str):
189+
readme_link = self._notebook_link(f"{self._install_folder}/README.py")
190+
job_links = ", ".join(
191+
f"[[{self._prefix.upper()}] {step_name}]({self._ws.config.host}#job/{job_id})"
192+
for step_name, job_id in self._deployed_steps.items()
193+
)
194+
path = f"{self._install_folder}/DEBUG.py"
195+
logger.debug(f"Created debug notebook: {self._notebook_link(path)}")
196+
self._ws.workspace.upload(
197+
path,
198+
DEBUG_NOTEBOOK.format(
199+
remote_wheel=remote_wheel, readme_link=readme_link, job_links=job_links, config_file=self._config_file
200+
).encode("utf8"),
201+
overwrite=True,
202+
)
203+
204+
def _notebook_link(self, path: str) -> str:
205+
return f"{self._ws.config.host}/#workspace{path}"
206+
138207
@staticmethod
139208
def _question(text: str, *, default: str | None = None) -> str:
140209
default_help = "" if default is None else f"\033[36m (default: {default})\033[0m"
@@ -146,14 +215,20 @@ def _question(text: str, *, default: str | None = None) -> str:
146215
return default
147216
return res
148217

149-
def _upload_wheel(self):
218+
def _upload_wheel(self) -> str:
150219
with tempfile.TemporaryDirectory() as tmp_dir:
151-
wheel = self._build_wheel(tmp_dir)
152-
dbfs_path = f"{self._install_folder}/wheels/{wheel.name}"
153-
with wheel.open("rb") as f:
154-
logger.info(f"Uploading wheel to dbfs:{dbfs_path}")
155-
self._ws.dbfs.upload(dbfs_path, f, overwrite=True)
156-
return dbfs_path
220+
local_wheel = self._build_wheel(tmp_dir)
221+
remote_wheel = f"{self._install_folder}/wheels/{local_wheel.name}"
222+
remote_dirname = os.path.dirname(remote_wheel)
223+
with local_wheel.open("rb") as f:
224+
self._ws.dbfs.mkdirs(remote_dirname)
225+
logger.info(f"Uploading wheel to dbfs:{remote_wheel}")
226+
self._ws.dbfs.upload(remote_wheel, f, overwrite=True)
227+
with local_wheel.open("rb") as f:
228+
self._ws.workspace.mkdirs(remote_dirname)
229+
logger.info(f"Uploading wheel to /Workspace{remote_wheel}")
230+
self._ws.workspace.upload(remote_wheel, f, overwrite=True, format=ImportFormat.AUTO)
231+
return remote_wheel
157232

158233
def _job_settings(self, step_name, dbfs_path):
159234
config_file = f"/Workspace/{self._install_folder}/config.yml"
@@ -164,8 +239,8 @@ def _job_settings(self, step_name, dbfs_path):
164239
)
165240
tasks = sorted([t for t in _TASKS.values() if t.workflow == step_name], key=lambda _: _.name)
166241
return {
167-
"name": f"[UCX] {step_name}",
168-
"tags": {"App": "ucx", "step": step_name},
242+
"name": f"[{self._prefix.upper()}] {step_name}",
243+
"tags": {TAG_APP: self._prefix, TAG_STEP: step_name},
169244
"job_clusters": self._job_clusters({t.job_cluster for t in tasks}),
170245
"email_notifications": email_notifications,
171246
"tasks": [
@@ -210,6 +285,7 @@ def _job_clusters(self, names: set[str]):
210285
spec,
211286
data_security_mode=compute.DataSecurityMode.LEGACY_TABLE_ACL,
212287
spark_conf={"spark.databricks.acl.sqlOnly": "true"},
288+
num_workers=1, # ShowPermissionsCommand needs a worker
213289
custom_tags={},
214290
),
215291
)
@@ -270,13 +346,14 @@ def _cluster_node_type(self, spec: compute.ClusterSpec) -> compute.ClusterSpec:
270346

271347
def _deployed_steps(self):
272348
deployed_steps = {}
349+
logger.debug(f"Fetching all jobs to determine already deployed steps for app={self._prefix}")
273350
for j in self._ws.jobs.list():
274351
tags = j.settings.tags
275352
if tags is None:
276353
continue
277-
if tags.get("App", None) != "ucx":
354+
if tags.get(TAG_APP, None) != self._prefix:
278355
continue
279-
deployed_steps[tags.get("step", "_")] = j.job_id
356+
deployed_steps[tags.get(TAG_STEP, "_")] = j.job_id
280357
return deployed_steps
281358

282359

src/databricks/labs/ucx/providers/mixins/fixtures.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ def create(
354354
kwargs["roles"] = _scim_values(roles)
355355
if entitlements is not None:
356356
kwargs["entitlements"] = _scim_values(entitlements)
357+
# TODO: REQUEST_LIMIT_EXCEEDED: GetUserPermissionsRequest RPC token bucket limit has been exceeded.
357358
return interface.create(**kwargs)
358359

359360
yield from factory(name, create, lambda item: interface.delete(item.id))

src/databricks/labs/ucx/runtime.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@
55
from databricks.sdk import WorkspaceClient
66

77
from databricks.labs.ucx.config import MigrationConfig
8-
from databricks.labs.ucx.logger import _install
98
from databricks.labs.ucx.tasks import task, trigger
109
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit
1110
from databricks.labs.ucx.toolkits.table_acls import TaclToolkit
1211

13-
_install()
14-
15-
logging.root.setLevel("INFO")
16-
1712
logger = logging.getLogger(__name__)
1813

1914

src/databricks/labs/ucx/tacl/_internal.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ def __init__(self, ws: WorkspaceClient, warehouse_id):
2727
self._warehouse_id = warehouse_id
2828

2929
def execute(self, sql):
30+
logger.debug(f"[api][execute] {sql}")
3031
self._sql.execute(self._warehouse_id, sql)
3132

3233
def fetch(self, sql) -> Iterator[any]:
34+
logger.debug(f"[api][fetch] {sql}")
3335
return self._sql.execute_fetch_all(self._warehouse_id, sql)
3436

3537

@@ -43,9 +45,11 @@ def __init__(self):
4345
self._spark = SparkSession.builder.getOrCreate()
4446

4547
def execute(self, sql):
48+
logger.debug(f"[spark][execute] {sql}")
4649
self._spark.sql(sql)
4750

4851
def fetch(self, sql) -> Iterator[any]:
52+
logger.debug(f"[spark][fetch] {sql}")
4953
return self._spark.sql(sql).collect()
5054

5155

@@ -160,6 +164,7 @@ def _snapshot(self, klass, fetcher, loader) -> list[any]:
160164
logger.debug(f"[{self._full_name}] crawling new batch for {self._table}")
161165
loaded_records = list(loader())
162166
if len(loaded_records) > 0:
167+
logger.debug(f"[{self._full_name}] found {len(loaded_records)} new records for {self._table}")
163168
self._append_records(klass, loaded_records)
164169
loaded = True
165170

@@ -230,10 +235,10 @@ def _append_records(self, klass, records: Iterator[any]):
230235
logger.debug(f"[{self._full_name}] not found. creating")
231236
schema = ", ".join(f"{f.name} {self._field_type(f)}" for f in fields)
232237
try:
233-
ddl = f"CREATE TABLE {self._full_name} ({schema}) USING DELTA"
234-
self._exec(ddl)
238+
self._exec(f"CREATE TABLE {self._full_name} ({schema}) USING DELTA")
235239
except Exception as e:
236240
schema_not_found = "SCHEMA_NOT_FOUND" in str(e)
237241
if not schema_not_found:
238242
raise e
243+
logger.debug(f"[{self._catalog}.{self._schema}] not found. creating")
239244
self._exec(f"CREATE SCHEMA {self._catalog}.{self._schema}")

src/databricks/labs/ucx/tacl/grants.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,11 @@ def _crawl(self, catalog: str, database: str) -> list[Grant]:
166166
tasks.append(partial(fn, view=table.name))
167167
else:
168168
tasks.append(partial(fn, table=table.name))
169-
return [grant for grants in ThreadedExecution.gather("listing grants", tasks) for grant in grants]
169+
return [
170+
grant
171+
for grants in ThreadedExecution.gather(f"listing grants for {catalog}.{database}", tasks)
172+
for grant in grants
173+
]
170174

171175
def _grants(
172176
self,

src/databricks/labs/ucx/tacl/tables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def _crawl(self, catalog: str, database: str) -> list[Table]:
119119
tasks = []
120120
for _, table, _is_tmp in self._fetch(f"SHOW TABLES FROM {catalog}.{database}"):
121121
tasks.append(partial(self._describe, catalog, database, table))
122-
return ThreadedExecution.gather("listing tables", tasks)
122+
return ThreadedExecution.gather(f"listing tables in {catalog}.{database}", tasks)
123123

124124
def _describe(self, catalog: str, database: str, table: str) -> Table:
125125
"""Fetches metadata like table type, data format, external table location,

src/databricks/labs/ucx/tasks.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pathlib import Path
66

77
from databricks.labs.ucx.config import MigrationConfig
8+
from databricks.labs.ucx.logger import _install
89

910
_TASKS: dict[str, "Task"] = {}
1011

@@ -69,6 +70,8 @@ def trigger(*argv):
6970
current_task = _TASKS[task_name]
7071
print(current_task.doc)
7172

73+
_install()
74+
7275
cfg = MigrationConfig.from_file(Path(args["config"]))
7376
logging.getLogger("databricks").setLevel(cfg.log_level)
7477

0 commit comments

Comments
 (0)