1919from databricks .labs .ucx .runtime import main
2020from databricks .labs .ucx .tasks import _TASKS
2121
22+ TAG_STEP = "step"
23+ TAG_APP = "App"
24+
25+ DEBUG_NOTEBOOK = """
26+ # Databricks notebook source
27+ # MAGIC %md
28+ # MAGIC # Debug companion for UCX installation (see [README]({readme_link}))
29+ # MAGIC
30+ # MAGIC Production runs are supposed to be triggered through the following jobs: {job_links}
31+ # MAGIC
32+ # MAGIC **This notebook is overwritten with each UCX update/(re)install.**
33+
34+ # COMMAND ----------
35+
36+ # MAGIC %pip install /Workspace{remote_wheel}
37+ dbutils.library.restartPython()
38+
39+ # COMMAND ----------
40+
41+ import logging
42+ from pathlib import Path
43+ from databricks.labs.ucx.__about__ import __version__
44+ from databricks.labs.ucx.config import MigrationConfig
45+ from databricks.labs.ucx import logger
46+ from databricks.sdk import WorkspaceClient
47+
48+ logger._install()
49+ logging.getLogger("databricks").setLevel("DEBUG")
50+
51+ cfg = MigrationConfig.from_file(Path("/Workspace{config_file}"))
52+ ws = WorkspaceClient()
53+
54+ print(__version__)
55+ """
56+
2257logger = logging .getLogger (__name__ )
2358
2459
2560class Installer :
26- def __init__ (self , ws : WorkspaceClient ):
61+ def __init__ (self , ws : WorkspaceClient , * , prefix : str = "ucx" , promtps : bool = True ):
2762 if "DATABRICKS_RUNTIME_VERSION" in os .environ :
2863 msg = "Installer is not supposed to be executed in Databricks Runtime"
2964 raise SystemExit (msg )
3065 self ._ws = ws
66+ self ._prefix = prefix
67+ self ._prompts = promtps
3168
3269 def run (self ):
3370 self ._configure ()
@@ -45,7 +82,7 @@ def _my_username(self):
4582
4683 @property
4784 def _install_folder (self ):
48- return f"/Users/{ self ._my_username } /.ucx "
85+ return f"/Users/{ self ._my_username } /.{ self . _prefix } "
4986
5087 @property
5188 def _config_file (self ):
@@ -60,14 +97,13 @@ def _current_config(self):
6097 return self ._config
6198
6299 def _configure (self ):
63- config_path = self ._config_file
64- ws_file_url = f"{ self ._ws .config .host } /#workspace{ config_path } "
100+ ws_file_url = self ._notebook_link (self ._config_file )
65101 try :
66- self ._ws .workspace .get_status (config_path )
102+ self ._ws .workspace .get_status (self . _config_file )
67103 logger .info (f"UCX is already configured. See { ws_file_url } " )
68- if self ._question ("Type 'yes' to open config file in the browser" ) == "yes" :
104+ if self ._prompts and self . _question ("Type 'yes' to open config file in the browser" ) == "yes" :
69105 webbrowser .open (ws_file_url )
70- return config_path
106+ return
71107 except DatabricksError as err :
72108 if err .error_code != "RESOURCE_DOES_NOT_EXIST" :
73109 raise err
@@ -84,41 +120,55 @@ def _configure(self):
84120 num_threads = int (self ._question ("Number of threads" , default = "8" )),
85121 )
86122
87- config_bytes = yaml .dump (self ._config .as_dict ()).encode ("utf8" )
88- self ._ws .workspace .upload (config_path , config_bytes , format = ImportFormat .AUTO )
89- logger .info (f"Created configuration file: { config_path } " )
90- if self ._question ("Open config file in the browser and continue installing?" , default = "yes" ) == "yes" :
123+ self ._write_config ()
124+ msg = "Open config file in the browser and continue installing?"
125+ if self ._prompts and self ._question (msg , default = "yes" ) == "yes" :
91126 webbrowser .open (ws_file_url )
92127
128+ def _write_config (self ):
129+ try :
130+ self ._ws .workspace .get_status (self ._install_folder )
131+ except DatabricksError as err :
132+ if err .error_code != "RESOURCE_DOES_NOT_EXIST" :
133+ raise err
134+ logger .debug (f"Creating install folder: { self ._install_folder } " )
135+ self ._ws .workspace .mkdirs (self ._install_folder )
136+
137+ config_bytes = yaml .dump (self ._config .as_dict ()).encode ("utf8" )
138+ logger .info (f"Creating configuration file: { self ._config_file } " )
139+ self ._ws .workspace .upload (self ._config_file , config_bytes , format = ImportFormat .AUTO )
140+
93141 def _create_jobs (self ):
94142 logger .debug (f"Creating jobs from tasks in { main .__name__ } " )
95- dbfs_path = self ._upload_wheel ()
96- deployed_steps = self ._deployed_steps ()
143+ remote_wheel = self ._upload_wheel ()
144+ self . _deployed_steps = self ._deployed_steps ()
97145 desired_steps = {t .workflow for t in _TASKS .values ()}
98146 for step_name in desired_steps :
99- settings = self ._job_settings (step_name , dbfs_path )
100- if step_name in deployed_steps :
101- job_id = deployed_steps [step_name ]
147+ settings = self ._job_settings (step_name , remote_wheel )
148+ if step_name in self . _deployed_steps :
149+ job_id = self . _deployed_steps [step_name ]
102150 logger .info (f"Updating configuration for step={ step_name } job_id={ job_id } " )
103151 self ._ws .jobs .reset (job_id , jobs .JobSettings (** settings ))
104152 else :
105153 logger .info (f"Creating new job configuration for step={ step_name } " )
106- deployed_steps [step_name ] = self ._ws .jobs .create (** settings ).job_id
154+ self . _deployed_steps [step_name ] = self ._ws .jobs .create (** settings ).job_id
107155
108- for step_name , job_id in deployed_steps .items ():
156+ for step_name , job_id in self . _deployed_steps .items ():
109157 if step_name not in desired_steps :
110158 logger .info (f"Removing job_id={ job_id } , as it is no longer needed" )
111159 self ._ws .jobs .delete (job_id )
112160
113- self ._create_readme (deployed_steps )
161+ self ._create_readme ()
162+ self ._create_debug (remote_wheel )
114163
115- def _create_readme (self , deployed_steps ):
164+ def _create_readme (self ):
116165 md = [
117166 "# UCX - The Unity Catalog Migration Assistant" ,
118167 "Here are the descriptions of jobs that trigger various stages of migration." ,
168+ f'To troubleshoot, see [debug notebook]({ self ._notebook_link (f"{ self ._install_folder } /DEBUG.py" )} ).' ,
119169 ]
120- for step_name , job_id in deployed_steps .items ():
121- md .append (f"## [[UCX ] { step_name } ]({ self ._ws .config .host } #job/{ job_id } )\n " )
170+ for step_name , job_id in self . _deployed_steps .items ():
171+ md .append (f"## [[{ self . _prefix . upper () } ] { step_name } ]({ self ._ws .config .host } #job/{ job_id } )\n " )
122172 for t in _TASKS .values ():
123173 if t .workflow != step_name :
124174 continue
@@ -129,12 +179,31 @@ def _create_readme(self, deployed_steps):
129179 intro = "\n " .join (preamble + [f"# MAGIC { line } " for line in md ])
130180 path = f"{ self ._install_folder } /README.py"
131181 self ._ws .workspace .upload (path , intro .encode ("utf8" ), overwrite = True )
132- url = f" { self ._ws . config . host } /#workspace { path } "
133- logger .info (f"Created notebook with job overview: { url } " )
182+ url = self ._notebook_link ( path )
183+ logger .info (f"Created README notebook with job overview: { url } " )
134184 msg = "Type 'yes' to open job overview in README notebook in your home directory"
135- if self ._question (msg ) == "yes" :
185+ if self ._prompts and self . _question (msg ) == "yes" :
136186 webbrowser .open (url )
137187
188+ def _create_debug (self , remote_wheel : str ):
189+ readme_link = self ._notebook_link (f"{ self ._install_folder } /README.py" )
190+ job_links = ", " .join (
191+ f"[[{ self ._prefix .upper ()} ] { step_name } ]({ self ._ws .config .host } #job/{ job_id } )"
192+ for step_name , job_id in self ._deployed_steps .items ()
193+ )
194+ path = f"{ self ._install_folder } /DEBUG.py"
195+ logger .debug (f"Created debug notebook: { self ._notebook_link (path )} " )
196+ self ._ws .workspace .upload (
197+ path ,
198+ DEBUG_NOTEBOOK .format (
199+ remote_wheel = remote_wheel , readme_link = readme_link , job_links = job_links , config_file = self ._config_file
200+ ).encode ("utf8" ),
201+ overwrite = True ,
202+ )
203+
204+ def _notebook_link (self , path : str ) -> str :
205+ return f"{ self ._ws .config .host } /#workspace{ path } "
206+
138207 @staticmethod
139208 def _question (text : str , * , default : str | None = None ) -> str :
140209 default_help = "" if default is None else f"\033 [36m (default: { default } )\033 [0m"
@@ -146,14 +215,20 @@ def _question(text: str, *, default: str | None = None) -> str:
146215 return default
147216 return res
148217
149- def _upload_wheel (self ):
218+ def _upload_wheel (self ) -> str :
150219 with tempfile .TemporaryDirectory () as tmp_dir :
151- wheel = self ._build_wheel (tmp_dir )
152- dbfs_path = f"{ self ._install_folder } /wheels/{ wheel .name } "
153- with wheel .open ("rb" ) as f :
154- logger .info (f"Uploading wheel to dbfs:{ dbfs_path } " )
155- self ._ws .dbfs .upload (dbfs_path , f , overwrite = True )
156- return dbfs_path
220+ local_wheel = self ._build_wheel (tmp_dir )
221+ remote_wheel = f"{ self ._install_folder } /wheels/{ local_wheel .name } "
222+ remote_dirname = os .path .dirname (remote_wheel )
223+ with local_wheel .open ("rb" ) as f :
224+ self ._ws .dbfs .mkdirs (remote_dirname )
225+ logger .info (f"Uploading wheel to dbfs:{ remote_wheel } " )
226+ self ._ws .dbfs .upload (remote_wheel , f , overwrite = True )
227+ with local_wheel .open ("rb" ) as f :
228+ self ._ws .workspace .mkdirs (remote_dirname )
229+ logger .info (f"Uploading wheel to /Workspace{ remote_wheel } " )
230+ self ._ws .workspace .upload (remote_wheel , f , overwrite = True , format = ImportFormat .AUTO )
231+ return remote_wheel
157232
158233 def _job_settings (self , step_name , dbfs_path ):
159234 config_file = f"/Workspace/{ self ._install_folder } /config.yml"
@@ -164,8 +239,8 @@ def _job_settings(self, step_name, dbfs_path):
164239 )
165240 tasks = sorted ([t for t in _TASKS .values () if t .workflow == step_name ], key = lambda _ : _ .name )
166241 return {
167- "name" : f"[UCX ] { step_name } " ,
168- "tags" : {"App" : "ucx" , "step" : step_name },
242+ "name" : f"[{ self . _prefix . upper () } ] { step_name } " ,
243+ "tags" : {TAG_APP : self . _prefix , TAG_STEP : step_name },
169244 "job_clusters" : self ._job_clusters ({t .job_cluster for t in tasks }),
170245 "email_notifications" : email_notifications ,
171246 "tasks" : [
@@ -210,6 +285,7 @@ def _job_clusters(self, names: set[str]):
210285 spec ,
211286 data_security_mode = compute .DataSecurityMode .LEGACY_TABLE_ACL ,
212287 spark_conf = {"spark.databricks.acl.sqlOnly" : "true" },
288+ num_workers = 1 , # ShowPermissionsCommand needs a worker
213289 custom_tags = {},
214290 ),
215291 )
@@ -270,13 +346,14 @@ def _cluster_node_type(self, spec: compute.ClusterSpec) -> compute.ClusterSpec:
270346
271347 def _deployed_steps (self ):
272348 deployed_steps = {}
349+ logger .debug (f"Fetching all jobs to determine already deployed steps for app={ self ._prefix } " )
273350 for j in self ._ws .jobs .list ():
274351 tags = j .settings .tags
275352 if tags is None :
276353 continue
277- if tags .get ("App" , None ) != "ucx" :
354+ if tags .get (TAG_APP , None ) != self . _prefix :
278355 continue
279- deployed_steps [tags .get ("step" , "_" )] = j .job_id
356+ deployed_steps [tags .get (TAG_STEP , "_" )] = j .job_id
280357 return deployed_steps
281358
282359
0 commit comments