|
116 | 116 | f'--parent_run_id=' + dbutils.widgets.get('parent_run_id')) |
117 | 117 | """ |
118 | 118 |
|
| 119 | +EXPORT_TO_EXCEL_NOTEBOOK = """# Databricks notebook source |
| 120 | +# MAGIC %md |
| 121 | +# MAGIC ##### Exporter of UCX assessment results |
| 122 | +# MAGIC ##### Instructions: |
| 123 | +# MAGIC 1. Execute using an all-purpose cluster with Databricks Runtime 14 or higher. |
| 124 | +# MAGIC 1. Hit **Run all** button and wait for completion. |
| 125 | +# MAGIC 1. Go to the bottom of the notebook and click the Download UCX Results button. |
| 126 | +# MAGIC |
| 127 | +# MAGIC ##### Important: |
| 128 | +# MAGIC Please note that this is only meant to serve as example code. |
| 129 | +# MAGIC |
| 130 | +# MAGIC Example code developed by **Databricks Shared Technical Services team**. |
| 131 | +
|
| 132 | +# COMMAND ---------- |
| 133 | +
|
| 134 | +# DBTITLE 1,Installing Packages |
| 135 | +# MAGIC %pip install {remote_wheel} -qqq |
| 136 | +# MAGIC %pip install xlsxwriter -qqq |
| 137 | +# MAGIC dbutils.library.restartPython() |
| 138 | +
|
| 139 | +# COMMAND ---------- |
| 140 | +
|
| 141 | +# DBTITLE 1,Libraries Import and Setting UCX |
| 142 | +import os |
| 143 | +import logging |
| 144 | +import threading |
| 145 | +import shutil |
| 146 | +from pathlib import Path |
| 147 | +from threading import Lock |
| 148 | +from functools import partial |
| 149 | +
|
| 150 | +import pandas as pd |
| 151 | +import xlsxwriter |
| 152 | +
|
| 153 | +from databricks.sdk.config import with_user_agent_extra |
| 154 | +from databricks.labs.blueprint.logger import install_logger |
| 155 | +from databricks.labs.blueprint.parallel import Threads |
| 156 | +from databricks.labs.lsql.dashboards import Dashboards |
| 157 | +from databricks.labs.lsql.lakeview.model import Dataset |
| 158 | +from databricks.labs.ucx.contexts.workflow_task import RuntimeContext |
| 159 | +
|
| 160 | +# ctx |
| 161 | +install_logger() |
| 162 | +with_user_agent_extra("cmd", "export-assessment") |
| 163 | +named_parameters = dict(config="/Workspace{config_file}") |
| 164 | +ctx = RuntimeContext(named_parameters) |
| 165 | +lock = Lock() |
| 166 | +
|
| 167 | +# COMMAND ---------- |
| 168 | +
|
| 169 | +# DBTITLE 1,Assessment Export |
| 170 | +FILE_NAME = "ucx_assessment_main.xlsx" |
| 171 | +TMP_PATH = f"/Workspace{{ctx.installation.install_folder()}}/tmp/" |
| 172 | +DOWNLOAD_PATH = "/dbfs/FileStore/excel-export" |
| 173 | +
|
| 174 | +
|
| 175 | +def _cleanup() -> None: |
| 176 | + '''Move the temporary results file to the download path and clean up the temp directory.''' |
| 177 | + shutil.move( |
| 178 | + os.path.join(TMP_PATH, FILE_NAME), |
| 179 | + os.path.join(DOWNLOAD_PATH, FILE_NAME), |
| 180 | + ) |
| 181 | + shutil.rmtree(TMP_PATH) |
| 182 | +
|
| 183 | +
|
| 184 | +def _prepare_directories() -> None: |
| 185 | + '''Ensure that the necessary directories exist.''' |
| 186 | + os.makedirs(TMP_PATH, exist_ok=True) |
| 187 | + os.makedirs(DOWNLOAD_PATH, exist_ok=True) |
| 188 | +
|
| 189 | +
|
| 190 | +def _to_excel(dataset: Dataset, writer: ...) -> None: |
| 191 | + '''Execute a SQL query and write the result to an Excel sheet.''' |
| 192 | + worksheet_name = dataset.display_name[:31] |
| 193 | + df = spark.sql(dataset.query).toPandas() |
| 194 | + with lock: |
| 195 | + df.to_excel(writer, sheet_name=worksheet_name, index=False) |
| 196 | +
|
| 197 | +
|
| 198 | +def _render_export() -> None: |
| 199 | + '''Render an HTML link for downloading the results.''' |
| 200 | + html_content = ''' |
| 201 | + <style>@font-face{{font-family:'DM Sans';src:url(https://cdn.bfldr.com/9AYANS2F/at/p9qfs3vgsvnp5c7txz583vgs/dm-sans-regular.ttf?auto=webp&format=ttf) format('truetype');font-weight:400;font-style:normal}}body{{font-family:'DM Sans',Arial,sans-serif}}.export-container{{text-align:center;margin-top:20px}}.export-container h2{{color:#1B3139;font-size:24px;margin-bottom:20px}}.export-container a{{display:inline-block;padding:12px 25px;background-color:#1B3139;color:#fff;text-decoration:none;border-radius:4px;font-size:18px;font-weight:500;transition:background-color 0.3s ease,transform:translateY(-2px) ease}}.export-container a:hover{{background-color:#FF3621;transform:translateY(-2px)}}</style> |
| 202 | + <div class="export-container"><h2>Export Results</h2><a href='{workspace_host}/files/excel-export/ucx_assessment_main.xlsx?o={workspace_id}' target='_blank' download>Download Results</a></div> |
| 203 | +
|
| 204 | + ''' |
| 205 | + displayHTML(html_content) |
| 206 | +
|
| 207 | +
|
| 208 | +def export_results() -> None: |
| 209 | + '''Main method to export results to an Excel file.''' |
| 210 | + _prepare_directories() |
| 211 | +
|
| 212 | + dashboard_path = ( |
| 213 | + Path(ctx.installation.install_folder()) |
| 214 | + / "dashboards/[UCX] UCX Assessment (Main).lvdash.json" |
| 215 | + ) |
| 216 | + dashboard = Dashboards(ctx.workspace_client) |
| 217 | + dashboard_datasets = dashboard.get_dashboard(dashboard_path).datasets |
| 218 | + try: |
| 219 | + target = TMP_PATH + "/ucx_assessment_main.xlsx" |
| 220 | + with pd.ExcelWriter(target, engine="xlsxwriter") as writer: |
| 221 | + tasks = [] |
| 222 | + for dataset in dashboard_datasets: |
| 223 | + tasks.append(partial(_to_excel, dataset, writer)) |
| 224 | + Threads.strict("exporting", tasks) |
| 225 | + _cleanup() |
| 226 | + _render_export() |
| 227 | + except Exception as e: |
| 228 | + print(f"Error exporting results ", e) |
| 229 | +
|
| 230 | +# COMMAND ---------- |
| 231 | +
|
| 232 | +# DBTITLE 1,Data Export |
| 233 | +export_results() |
| 234 | +""" |
| 235 | + |
119 | 236 |
|
120 | 237 | class DeployedWorkflows: |
121 | 238 | def __init__(self, ws: WorkspaceClient, install_state: InstallState): |
@@ -502,6 +619,7 @@ def create_jobs(self) -> None: |
502 | 619 | self.remove_jobs(keep=desired_workflows) |
503 | 620 | self._install_state.save() |
504 | 621 | self._create_debug(remote_wheels) |
| 622 | + self._create_export(remote_wheels) |
505 | 623 | self._create_readme() |
506 | 624 |
|
507 | 625 | def remove_jobs(self, *, keep: set[str] | None = None) -> None: |
@@ -840,6 +958,16 @@ def _create_debug(self, remote_wheels: list[str]): |
840 | 958 | ).encode("utf8") |
841 | 959 | self._installation.upload('DEBUG.py', content) |
842 | 960 |
|
| 961 | + def _create_export(self, remote_wheels: list[str]): |
| 962 | + remote_wheels_str = " ".join(remote_wheels) |
| 963 | + content = EXPORT_TO_EXCEL_NOTEBOOK.format( |
| 964 | + remote_wheel=remote_wheels_str, |
| 965 | + config_file=self._config_file, |
| 966 | + workspace_host=self._ws.config.host, |
| 967 | + workspace_id=self._ws.get_workspace_id(), |
| 968 | + ).encode("utf8") |
| 969 | + self._installation.upload('EXPORT_ASSESSMENT_TO_EXCEL.py', content) |
| 970 | + |
843 | 971 |
|
844 | 972 | class MaxedStreamHandler(logging.StreamHandler): |
845 | 973 |
|
|
0 commit comments