Skip to content

Commit 79d730c

Browse files
authored
Added DBSQL queries & dashboard migration (#1532)
1 parent 7f0b778 commit 79d730c

File tree

11 files changed

+453
-19
lines changed

11 files changed

+453
-19
lines changed

README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project.
6767
* [`alias` command](#alias-command)
6868
* [Code migration commands](#code-migration-commands)
6969
* [`migrate-local-code` command](#migrate-local-code-command)
70+
* [`migrate-dbsql-dashboards` command](#migrate-dbsql-dashboards-command)
71+
* [`revert-dbsql-dashboards` command](#revert-dbsql-dashboards-command)
7072
* [Cross-workspace installations](#cross-workspace-installations)
7173
* [`sync-workspace-info` command](#sync-workspace-info-command)
7274
* [`manual-workspace-info` command](#manual-workspace-info-command)
@@ -929,6 +931,38 @@ the automated transformation process.
929931

930932
[[back to top](#databricks-labs-ucx)]
931933

934+
## `migrate-dbsql-dashboards` command
935+
936+
```text
937+
databricks labs ucx migrate-dbsql-dashboards [--dashboard-id <dashboard-id>]
938+
```
939+
940+
**(Experimental)** Once [table migration](#table-migration-workflow) is complete, you can run this command to
941+
migrate all Databricks SQL dashboards in the workspace. At this moment, this command is highly experimental and discards
942+
formatting during the automated transformation process.
943+
944+
This command tags dashboards & queries that have been migrated with `migrated by UCX` tag. The original queries are
945+
also backed up in the ucx installation folder, to allow for easy rollback (see [`revert-dbsql-dashboards` command](#revert-dbsql-dashboards-command)).
946+
947+
This command can be run with `--dashboard-id` flag to migrate a specific dashboard.
948+
949+
This command is incremental and can be run multiple times to migrate new dashboards.
950+
951+
[[back to top](#databricks-labs-ucx)]
952+
953+
## `revert-dbsql-dashboards` command
954+
955+
```text
956+
databricks labs ucx revert-dbsql-dashboards [--dashboard-id <dashboard-id>]
957+
```
958+
959+
**(Experimental)** This command reverts the migration of Databricks SQL dashboards in the workspace, after
960+
`migrate-dbsql-dashboards` command is executed.
961+
962+
This command can be run with `--dashboard-id` flag to migrate a specific dashboard.
963+
964+
[[back to top](#databricks-labs-ucx)]
965+
932966
# Cross-workspace installations
933967

934968
When installing UCX across multiple workspaces, administrators need to keep UCX configurations in sync.

labs.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,15 @@ commands:
208208
description: |
209209
Trigger the migrate-tables workflow and, optionally, migrate-external-hiveserde-tables-in-place-experimental
210210
workflow and migrate-external-tables-ctas workflow.
211+
212+
- name: migrate-dbsql-dashboards
213+
description: Migrate DBSQL dashboards by replacing legacy HMS tables in DBSQL queries with the corresponding new UC tables.
214+
flags:
215+
- name: dashboard-id
216+
description: (Optional) DBSQL dashboard ID to migrate. If no dashboard ID is provided, all DBSQL dashboards in the workspace will be migrated.
217+
218+
- name: revert-dbsql-dashboards
219+
description: Revert DBSQL dashboards that have been migrated to their original state before the migration.
220+
flags:
221+
- name: dashboard-id
222+
description: (Optional) DBSQL dashboard ID to revert. If no dashboard ID is provided, all migrated DBSQL dashboards in the workspace will be reverted.

src/databricks/labs/ucx/cli.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,5 +455,19 @@ def migrate_tables(w: WorkspaceClient, prompts: Prompts, *, ctx: WorkspaceContex
455455
deployed_workflows.run_workflow("migrate-external-tables-ctas")
456456

457457

458+
@ucx.command
459+
def migrate_dbsql_dashboards(w: WorkspaceClient, dashboard_id: str | None = None):
460+
"""Migrate table references in DBSQL Dashboard queries"""
461+
ctx = WorkspaceContext(w)
462+
ctx.redash.migrate_dashboards(dashboard_id)
463+
464+
465+
@ucx.command
466+
def revert_dbsql_dashboards(w: WorkspaceClient, dashboard_id: str | None = None):
467+
"""Revert migrated DBSQL Dashboard queries back to their original state"""
468+
ctx = WorkspaceContext(w)
469+
ctx.redash.revert_dashboards(dashboard_id)
470+
471+
458472
if __name__ == "__main__":
459473
ucx()

src/databricks/labs/ucx/contexts/application.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist
4646
from databricks.labs.ucx.source_code.site_packages import SitePackageResolver, SitePackages
4747
from databricks.labs.ucx.source_code.languages import Languages
48+
from databricks.labs.ucx.source_code.redash import Redash
4849
from databricks.labs.ucx.workspace_access import generic, redash
4950
from databricks.labs.ucx.workspace_access.groups import GroupManager
5051
from databricks.labs.ucx.workspace_access.manager import PermissionManager
@@ -403,6 +404,14 @@ def workflow_linter(self):
403404
MigrationIndex([]), # TODO: bring back self.tables_migrator.index()
404405
)
405406

407+
@cached_property
408+
def redash(self):
409+
return Redash(
410+
self.migration_status_refresher.index(),
411+
self.workspace_client,
412+
self.installation,
413+
)
414+
406415

407416
class CliContext(GlobalContext, abc.ABC):
408417
@cached_property

src/databricks/labs/ucx/mixins/fixtures.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@
4343
GetResponse,
4444
ObjectTypePlural,
4545
Query,
46-
QueryInfo,
46+
Dashboard,
47+
WidgetOptions,
48+
WidgetPosition,
4749
)
4850
from databricks.sdk.service.workspace import ImportFormat, Language
4951

@@ -1122,13 +1124,14 @@ def remove(udf_info: FunctionInfo):
11221124

11231125
@pytest.fixture
11241126
def make_query(ws, make_table, make_random):
1125-
def create() -> QueryInfo:
1127+
def create() -> Query:
11261128
table = make_table()
11271129
query_name = f"ucx_query_Q{make_random(4)}"
11281130
query = ws.queries.create(
1129-
name=f"{query_name}",
1131+
name=query_name,
11301132
description="TEST QUERY FOR UCX",
11311133
query=f"SELECT * FROM {table.schema_name}.{table.name}",
1134+
tags=["original_query_tag"],
11321135
)
11331136
logger.info(f"Query Created {query_name}: {ws.config.host}/sql/editor/{query.id}")
11341137
return query
@@ -1264,3 +1267,49 @@ def remove(path: str):
12641267
ws.dbfs.delete(path, recursive=True)
12651268

12661269
yield from factory("make_storage_dir", create, remove)
1270+
1271+
1272+
@pytest.fixture
1273+
def make_dashboard(ws, make_random, make_query):
1274+
def create() -> Dashboard:
1275+
query = make_query()
1276+
viz = ws.query_visualizations.create(
1277+
type="table",
1278+
query_id=query.id,
1279+
options={
1280+
"itemsPerPage": 1,
1281+
"condensed": True,
1282+
"withRowNumber": False,
1283+
"version": 2,
1284+
"columns": [
1285+
{"name": "id", "title": "id", "allowSearch": True},
1286+
],
1287+
},
1288+
)
1289+
1290+
dashboard_name = f"ucx_D{make_random(4)}"
1291+
dashboard = ws.dashboards.create(name=dashboard_name, tags=["original_dashboard_tag"])
1292+
ws.dashboard_widgets.create(
1293+
dashboard_id=dashboard.id,
1294+
visualization_id=viz.id,
1295+
width=1,
1296+
options=WidgetOptions(
1297+
title="",
1298+
position=WidgetPosition(
1299+
col=0,
1300+
row=0,
1301+
size_x=3,
1302+
size_y=3,
1303+
),
1304+
),
1305+
)
1306+
logger.info(f"Dashboard Created {dashboard_name}: {ws.config.host}/sql/dashboards/{dashboard.id}")
1307+
return dashboard
1308+
1309+
def remove(dashboard: Dashboard):
1310+
try:
1311+
ws.dashboards.delete(dashboard_id=dashboard.id)
1312+
except RuntimeError as e:
1313+
logger.info(f"Can't delete dashboard {e}")
1314+
1315+
yield from factory("dashboard", create, remove)

src/databricks/labs/ucx/source_code/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def apply(self, code: str) -> str: ...
8686

8787
# The default schema to use when the schema is not specified in a table reference
8888
# See: https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-qry-select-usedb.html
89+
DEFAULT_CATALOG = 'hive_metastore'
8990
DEFAULT_SCHEMA = 'default'
9091

9192

@@ -97,10 +98,12 @@ class CurrentSessionState:
9798
This class can be used to track various aspects of a session, such as the current schema.
9899
99100
Attributes:
101+
catalog (str): The current schema of the session. If not provided, it defaults to 'DEFAULT_CATALOG'.
100102
schema (str): The current schema of the session. If not provided, it defaults to 'DEFAULT_SCHEMA'.
101103
"""
102104

103105
schema: str = DEFAULT_SCHEMA
106+
catalog: str = DEFAULT_CATALOG
104107

105108

106109
class SequentialLinter(Linter):

src/databricks/labs/ucx/source_code/queries.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def __init__(self, index: MigrationIndex, session_state: CurrentSessionState):
3333
table -> Table(catalog='', db='', this='table')
3434
"""
3535
self._index: MigrationIndex = index
36-
self._session_state: CurrentSessionState = session_state if session_state else CurrentSessionState()
36+
self._session_state: CurrentSessionState = session_state
3737

3838
def name(self) -> str:
3939
return 'table-migrate'
Lines changed: 121 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,130 @@
1+
import logging
2+
from collections.abc import Iterator
3+
from dataclasses import replace
4+
5+
from databricks.labs.blueprint.installation import Installation
6+
17
from databricks.sdk import WorkspaceClient
2-
from databricks.sdk.service.sql import Query
8+
from databricks.sdk.service.sql import Query, Dashboard
9+
from databricks.sdk.errors.platform import DatabricksError
310

4-
from databricks.labs.ucx.source_code.base import Fixer
11+
from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex
12+
from databricks.labs.ucx.source_code.base import CurrentSessionState
13+
from databricks.labs.ucx.source_code.queries import FromTable
14+
15+
logger = logging.getLogger(__name__)
516

617

718
class Redash:
8-
def __init__(self, fixer: Fixer, ws: WorkspaceClient):
9-
self._fixer = fixer
19+
MIGRATED_TAG = "Migrated by UCX"
20+
21+
def __init__(self, index: MigrationIndex, ws: WorkspaceClient, installation: Installation):
22+
self._index = index
1023
self._ws = ws
24+
self._installation = installation
25+
26+
def migrate_dashboards(self, dashboard_id: str | None = None):
27+
for dashboard in self._list_dashboards(dashboard_id):
28+
assert dashboard.id is not None
29+
if dashboard.tags is not None and self.MIGRATED_TAG in dashboard.tags:
30+
logger.debug(f"Dashboard {dashboard.name} already migrated by UCX")
31+
continue
32+
for query in self.get_queries_from_dashboard(dashboard):
33+
self._fix_query(query)
34+
self._ws.dashboards.update(dashboard.id, tags=self._get_migrated_tags(dashboard.tags))
35+
36+
def revert_dashboards(self, dashboard_id: str | None = None):
37+
for dashboard in self._list_dashboards(dashboard_id):
38+
assert dashboard.id is not None
39+
if dashboard.tags is None or self.MIGRATED_TAG not in dashboard.tags:
40+
logger.debug(f"Dashboard {dashboard.name} was not migrated by UCX")
41+
continue
42+
for query in self.get_queries_from_dashboard(dashboard):
43+
self._revert_query(query)
44+
self._ws.dashboards.update(dashboard.id, tags=self._get_original_tags(dashboard.tags))
45+
46+
def _list_dashboards(self, dashboard_id: str | None) -> list[Dashboard]:
47+
try:
48+
if dashboard_id is None:
49+
return list(self._ws.dashboards.list())
50+
return [self._ws.dashboards.get(dashboard_id)]
51+
except DatabricksError as e:
52+
logger.error(f"Cannot list dashboards: {e}")
53+
return []
54+
55+
def _fix_query(self, query: Query):
56+
assert query.id is not None
57+
assert query.query is not None
58+
# query already migrated
59+
if query.tags is not None and self.MIGRATED_TAG in query.tags:
60+
return
61+
# backup the query
62+
self._installation.save(query, filename=f'backup/queries/{query.id}.json')
63+
from_table = FromTable(self._index, self._get_session_state(query))
64+
new_query = from_table.apply(query.query)
65+
try:
66+
self._ws.queries.update(
67+
query.id,
68+
query=new_query,
69+
tags=self._get_migrated_tags(query.tags),
70+
)
71+
except DatabricksError:
72+
logger.warning(f"Cannot upgrade {query.name}")
73+
return
1174

12-
def fix(self, query: Query):
75+
@staticmethod
76+
def _get_session_state(query: Query) -> CurrentSessionState:
77+
session_state = CurrentSessionState()
78+
if query.options is None:
79+
return session_state
80+
if query.options.catalog:
81+
session_state = replace(session_state, catalog=query.options.catalog)
82+
if query.options.schema:
83+
session_state = replace(session_state, schema=query.options.schema)
84+
return session_state
85+
86+
def _revert_query(self, query: Query):
1387
assert query.id is not None
1488
assert query.query is not None
15-
query.query = self._fixer.apply(query.query)
16-
self._ws.queries.update(
17-
query.id,
18-
data_source_id=query.data_source_id,
19-
description=query.description,
20-
name=query.name,
21-
options=query.options,
22-
query=query.query,
23-
run_as_role=query.run_as_role,
24-
)
89+
if query.tags is None:
90+
return
91+
# find the backup query
92+
is_migrated = False
93+
for tag in query.tags:
94+
if tag == self.MIGRATED_TAG:
95+
is_migrated = True
96+
97+
if not is_migrated:
98+
logger.debug(f"Query {query.name} was not migrated by UCX")
99+
return
100+
101+
backup_query = self._installation.load(Query, filename=f'backup/queries/{query.id}.json')
102+
try:
103+
self._ws.queries.update(query.id, query=backup_query.query, tags=self._get_original_tags(backup_query.tags))
104+
except DatabricksError:
105+
logger.warning(f"Cannot restore {query.name} from backup")
106+
return
107+
108+
def _get_migrated_tags(self, tags: list[str] | None) -> list[str]:
109+
out = [self.MIGRATED_TAG]
110+
if tags:
111+
out.extend(tags)
112+
return out
113+
114+
def _get_original_tags(self, tags: list[str] | None) -> list[str] | None:
115+
if tags is None:
116+
return None
117+
return [tag for tag in tags if tag != self.MIGRATED_TAG]
118+
119+
@staticmethod
120+
def get_queries_from_dashboard(dashboard: Dashboard) -> Iterator[Query]:
121+
if dashboard.widgets is None:
122+
return
123+
for widget in dashboard.widgets:
124+
if widget is None:
125+
continue
126+
if widget.visualization is None:
127+
continue
128+
if widget.visualization.query is None:
129+
continue
130+
yield widget.visualization.query
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from databricks.labs.ucx.source_code.redash import Redash
2+
from databricks.sdk.service.sql import Query, Dashboard
3+
4+
5+
def test_fix_dashboard(ws, installation_ctx, make_dashboard, make_query):
6+
dashboard: Dashboard = make_dashboard()
7+
another_query: Query = make_query()
8+
installation_ctx.workspace_installation.run()
9+
installation_ctx.redash.migrate_dashboards(dashboard.id)
10+
# make sure the query is marked as migrated
11+
queries = Redash.get_queries_from_dashboard(dashboard)
12+
for query in queries:
13+
content = ws.queries.get(query.id)
14+
assert Redash.MIGRATED_TAG in content.tags
15+
16+
# make sure a different query does not get migrated
17+
another_query = ws.queries.get(another_query.id)
18+
assert len(another_query.tags) == 1
19+
assert Redash.MIGRATED_TAG not in another_query.tags
20+
21+
# revert the dashboard, make sure the query has only a single tag
22+
installation_ctx.redash.revert_dashboards(dashboard.id)
23+
for query in queries:
24+
content = ws.queries.get(query.id)
25+
assert len(content.tags) == 1

0 commit comments

Comments
 (0)