|
| 1 | +import base64 |
1 | 2 | import logging |
| 3 | +from datetime import timedelta |
2 | 4 | from pathlib import Path |
| 5 | +from typing import Any |
3 | 6 |
|
4 | | -from databricks.labs.blueprint.tui import Prompts |
| 7 | +from databricks.sdk.service import compute, jobs |
| 8 | +from databricks.sdk.service.jobs import RunResultState |
| 9 | +from databricks.sdk.service.workspace import ExportFormat |
| 10 | +from databricks.sdk.errors import NotFound, ResourceDoesNotExist |
| 11 | +from databricks.sdk.retries import retried |
| 12 | +from databricks.sdk import WorkspaceClient |
5 | 13 |
|
6 | | -from databricks.labs.ucx.config import WorkspaceConfig |
| 14 | +from databricks.labs.blueprint.installation import Installation |
| 15 | +from databricks.labs.blueprint.tui import Prompts |
7 | 16 | from databricks.labs.lsql.backends import SqlBackend |
8 | 17 | from databricks.labs.lsql.dashboards import DashboardMetadata |
9 | 18 |
|
| 19 | +from databricks.labs.ucx.config import WorkspaceConfig |
| 20 | +from databricks.labs.ucx.assessment.export_html_template import EXPORT_HTML_TEMPLATE |
| 21 | + |
10 | 22 | logger = logging.getLogger(__name__) |
11 | 23 |
|
12 | 24 |
|
13 | 25 | class AssessmentExporter: |
14 | 26 |
|
15 | | - def __init__(self, sql_backend: SqlBackend, config: WorkspaceConfig): |
| 27 | + def __init__(self, ws: WorkspaceClient, sql_backend: SqlBackend, config: WorkspaceConfig): |
| 28 | + self._ws = ws |
16 | 29 | self._sql_backend = sql_backend |
17 | 30 | self._config = config |
| 31 | + self._install_folder = f"/Workspace/{Installation.assume_global(ws, 'ucx')}/" |
| 32 | + self._base_path = Path(__file__).resolve().parents[3] / "labs/ucx/queries/assessment" |
18 | 33 |
|
19 | | - def export_results(self, prompts: Prompts): |
20 | | - """Main method to export results to CSV files inside a ZIP archive.""" |
21 | | - project_root = Path(__file__).resolve().parents[3] |
22 | | - queries_path_root = project_root / "labs/ucx/queries/assessment" |
| 34 | + @staticmethod |
| 35 | + def _export_to_excel( |
| 36 | + assessment_metadata: DashboardMetadata, sql_backend: SqlBackend, export_path: Path, writter: Any |
| 37 | + ): |
| 38 | + """Export Assessment to Excel""" |
| 39 | + with writter.ExcelWriter(export_path, engine='xlsxwriter') as writer: |
| 40 | + for tile in assessment_metadata.tiles: |
| 41 | + if not tile.metadata.is_query(): |
| 42 | + continue |
| 43 | + |
| 44 | + try: |
| 45 | + rows = list(sql_backend.fetch(tile.content)) |
| 46 | + if not rows: |
| 47 | + continue |
23 | 48 |
|
24 | | - results_directory = Path( |
| 49 | + data = [row.asDict() for row in rows] |
| 50 | + df = writter.DataFrame(data) |
| 51 | + |
| 52 | + sheet_name = str(tile.metadata.id)[:31] |
| 53 | + df.to_excel(writer, sheet_name=sheet_name, index=False) |
| 54 | + |
| 55 | + except NotFound as e: |
| 56 | + msg = ( |
| 57 | + str(e).split(" Verify", maxsplit=1)[0] + f" Export will continue without {tile.metadata.title}" |
| 58 | + ) |
| 59 | + logging.warning(msg) |
| 60 | + continue |
| 61 | + |
| 62 | + @retried(on=[ResourceDoesNotExist], timeout=timedelta(minutes=1)) |
| 63 | + def _render_export(self, export_file_path: Path) -> str: |
| 64 | + """Render an HTML link for downloading the results.""" |
| 65 | + binary_data = self._ws.workspace.download(export_file_path.as_posix()).read() |
| 66 | + b64_data = base64.b64encode(binary_data).decode('utf-8') |
| 67 | + |
| 68 | + return EXPORT_HTML_TEMPLATE.format(b64_data=b64_data, export_file_path_name=export_file_path.name) |
| 69 | + |
| 70 | + @staticmethod |
| 71 | + def _get_output_directory(prompts: Prompts) -> Path: |
| 72 | + return Path( |
25 | 73 | prompts.question( |
26 | 74 | "Choose a path to save the UCX Assessment results", |
27 | 75 | default=Path.cwd().as_posix(), |
28 | 76 | validate=lambda p_: Path(p_).exists(), |
29 | 77 | ) |
30 | 78 | ) |
31 | 79 |
|
| 80 | + def _get_queries(self, assessment: str) -> DashboardMetadata: |
| 81 | + """Get UCX queries to export""" |
| 82 | + queries_path = self._base_path / assessment if assessment else self._base_path |
| 83 | + return DashboardMetadata.from_path(queries_path).replace_database( |
| 84 | + database=self._config.inventory_database, database_to_replace="inventory" |
| 85 | + ) |
| 86 | + |
| 87 | + def cli_export_csv_results(self, prompts: Prompts) -> Path: |
| 88 | + """Main method to export results to CSV files inside a ZIP archive.""" |
| 89 | + results_directory = self._get_output_directory(prompts) |
| 90 | + |
32 | 91 | query_choice = prompts.choice( |
33 | 92 | "Choose which assessment results to export", |
34 | | - [subdir.name for subdir in queries_path_root.iterdir() if subdir.is_dir()], |
| 93 | + [subdir.name for subdir in self._base_path.iterdir() if subdir.is_dir()], |
35 | 94 | ) |
36 | 95 |
|
37 | | - export_path = results_directory / f"export_{query_choice}_results.zip" |
38 | | - queries_path = queries_path_root / query_choice |
| 96 | + results_path = self._get_queries(query_choice).export_to_zipped_csv( |
| 97 | + self._sql_backend, results_directory / f"export_{query_choice}_results.zip" |
| 98 | + ) |
39 | 99 |
|
40 | | - assessment_results = DashboardMetadata.from_path(queries_path).replace_database( |
41 | | - database=self._config.inventory_database, database_to_replace="inventory" |
| 100 | + return results_path |
| 101 | + |
| 102 | + def cli_export_xlsx_results(self, prompts: Prompts) -> Path: |
| 103 | + """Submit Excel export notebook in a job""" |
| 104 | + |
| 105 | + notebook_path = f"{self._install_folder}/EXPORT_ASSESSMENT_TO_EXCEL" |
| 106 | + export_file_name = Path(f"{self._install_folder}/ucx_assessment_main.xlsx") |
| 107 | + results_directory = Path(self._get_output_directory(prompts)) / export_file_name.name |
| 108 | + |
| 109 | + run = self._ws.jobs.submit_and_wait( |
| 110 | + run_name="export-assessment-to-excel-experimental", |
| 111 | + tasks=[ |
| 112 | + jobs.SubmitTask( |
| 113 | + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), |
| 114 | + task_key="export-assessment", |
| 115 | + new_cluster=compute.ClusterSpec( |
| 116 | + data_security_mode=compute.DataSecurityMode.LEGACY_SINGLE_USER_STANDARD, |
| 117 | + spark_conf={ |
| 118 | + "spark.databricks.cluster.profile": "singleNode", |
| 119 | + "spark.master": "local[*]", |
| 120 | + }, |
| 121 | + custom_tags={"ResourceClass": "SingleNode"}, |
| 122 | + num_workers=0, |
| 123 | + policy_id=self._config.policy_id, |
| 124 | + apply_policy_default_values=True, |
| 125 | + ), |
| 126 | + ) |
| 127 | + ], |
42 | 128 | ) |
43 | 129 |
|
44 | | - logger.info("Exporting assessment results....") |
45 | | - results_path = assessment_results.export_to_zipped_csv(self._sql_backend, export_path) |
46 | | - logger.info(f"Results exported to {results_path}") |
| 130 | + if run.state and run.state.result_state == RunResultState.SUCCESS: |
| 131 | + binary_resp = self._ws.workspace.download(path=export_file_name.as_posix(), format=ExportFormat.SOURCE) |
| 132 | + results_directory.write_bytes(binary_resp.read()) |
47 | 133 |
|
48 | | - return results_path |
| 134 | + return results_directory |
| 135 | + |
| 136 | + def web_export_results(self, writer: Any) -> str: |
| 137 | + """Alternative method to export results from the UI.""" |
| 138 | + export_file_name = Path(f"{self._install_folder}/ucx_assessment_main.xlsx") |
| 139 | + assessment_main = self._get_queries("main") |
| 140 | + self._export_to_excel(assessment_main, self._sql_backend, export_file_name, writer) |
| 141 | + return self._render_export(export_file_name) |
0 commit comments