Skip to content

Commit 473f137

Browse files
authored
Added databricks labs ucx revert-migrated-table command (#729)
1 parent 6c89e67 commit 473f137

File tree

5 files changed

+401
-2
lines changed

5 files changed

+401
-2
lines changed

labs.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,13 @@ commands:
4949

5050
- name: validate-external-locations
5151
description: validates and provides mapping to external table to external location and shared generation tf scripts
52+
53+
- name: revert-migrated-tables
54+
description: remove notation on a migrated table for re-migration
55+
flags:
56+
- name: schema
57+
description: Schema to revert (if left blank all schemas in the workspace will be reverted)
58+
- name: table
59+
description: Table to revert (if left blank all tables in the schema will be reverted). Requires schema parameter to be specified.)
60+
- name: delete_managed
61+
description: Revert and delete managed tables

src/databricks/labs/ucx/cli.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from databricks.labs.ucx.framework.tui import Prompts
1212
from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler
1313
from databricks.labs.ucx.hive_metastore.mapping import TableMapping
14+
from databricks.labs.ucx.hive_metastore.tables import TablesMigrate
1415
from databricks.labs.ucx.install import WorkspaceInstaller
1516
from databricks.labs.ucx.installer import InstallationManager
1617

@@ -113,6 +114,31 @@ def ensure_assessment_run():
113114
workspace_installer.validate_and_run("assessment")
114115

115116

117+
def revert_migrated_tables(schema: str, table: str, *, delete_managed: bool = False):
118+
ws = WorkspaceClient()
119+
prompts = Prompts()
120+
installation_manager = InstallationManager(ws)
121+
installation = installation_manager.for_user(ws.current_user.me())
122+
if not schema and not table:
123+
if not prompts.confirm(
124+
"You haven't specified a schema or a table. All migrated tables will be reverted."
125+
" Would you like to continue?",
126+
max_attempts=2,
127+
):
128+
return None
129+
if not installation:
130+
logger.error(CANT_FIND_UCX_MSG)
131+
return None
132+
warehouse_id = installation.config.warehouse_id
133+
sql_backend = StatementExecutionBackend(ws, warehouse_id)
134+
table_crawler = TablesCrawler(sql_backend, installation.config.inventory_database)
135+
tm = TablesMigrate(table_crawler, ws, sql_backend)
136+
if tm.print_revert_report(delete_managed=delete_managed) and prompts.confirm(
137+
"Would you like to continue?", max_attempts=2
138+
):
139+
tm.revert_migrated_tables(schema, table, delete_managed=delete_managed)
140+
141+
116142
MAPPING = {
117143
"open-remote-config": open_remote_config,
118144
"installations": list_installations,
@@ -123,6 +149,7 @@ def ensure_assessment_run():
123149
"validate-external-locations": validate_external_locations,
124150
"ensure-assessment-run": ensure_assessment_run,
125151
"skip": skip,
152+
"revert-migrated-tables": revert_migrated_tables,
126153
}
127154

128155

src/databricks/labs/ucx/hive_metastore/tables.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import re
3+
from collections import defaultdict
34
from collections.abc import Iterable, Iterator
45
from dataclasses import dataclass
56
from functools import partial
@@ -73,6 +74,12 @@ def sql_alter_from(self, catalog):
7374
f" TBLPROPERTIES ('upgraded_from' = '{self.key}');"
7475
)
7576

77+
def sql_unset_upgraded_to(self, catalog):
78+
return (
79+
f"ALTER {self.kind} `{catalog}`.`{self.database}`.`{self.name}` "
80+
f"UNSET TBLPROPERTIES IF EXISTS('upgraded_to');"
81+
)
82+
7683

7784
@dataclass
7885
class TableError:
@@ -82,6 +89,14 @@ class TableError:
8289
error: str | None = None
8390

8491

92+
@dataclass
93+
class MigrationCount:
94+
database: str
95+
managed_tables: int = 0
96+
external_tables: int = 0
97+
views: int = 0
98+
99+
85100
class TablesCrawler(CrawlerBase):
86101
def __init__(self, backend: SqlBackend, schema):
87102
"""
@@ -252,3 +267,103 @@ def _init_seen_tables(self):
252267

253268
def _table_already_upgraded(self, target) -> bool:
254269
return target in self._seen_tables
270+
271+
def _get_tables_to_revert(self, schema: str | None = None, table: str | None = None) -> list[Table]:
272+
schema = schema.lower() if schema else None
273+
table = table.lower() if table else None
274+
upgraded_tables = []
275+
if table and not schema:
276+
logger.error("Cannot accept 'Table' parameter without 'Schema' parameter")
277+
if len(self._seen_tables) == 0:
278+
self._init_seen_tables()
279+
280+
for cur_table in self._tc.snapshot():
281+
if schema and cur_table.database != schema:
282+
continue
283+
if table and cur_table.name != table:
284+
continue
285+
if cur_table.key in self._seen_tables.values():
286+
upgraded_tables.append(cur_table)
287+
return upgraded_tables
288+
289+
def revert_migrated_tables(
290+
self, schema: str | None = None, table: str | None = None, *, delete_managed: bool = False
291+
):
292+
upgraded_tables = self._get_tables_to_revert(schema=schema, table=table)
293+
# reverses the _seen_tables dictionary to key by the source table
294+
reverse_seen = {v: k for (k, v) in self._seen_tables.items()}
295+
tasks = []
296+
for upgraded_table in upgraded_tables:
297+
if upgraded_table.kind == "VIEW" or upgraded_table.object_type == "EXTERNAL" or delete_managed:
298+
tasks.append(partial(self._revert_migrated_table, upgraded_table, reverse_seen[upgraded_table.key]))
299+
continue
300+
logger.info(
301+
f"Skipping {upgraded_table.object_type} Table {upgraded_table.database}.{upgraded_table.name} "
302+
f"upgraded_to {upgraded_table.upgraded_to}"
303+
)
304+
Threads.strict("revert migrated tables", tasks)
305+
306+
def _revert_migrated_table(self, table: Table, target_table_key: str):
307+
logger.info(
308+
f"Reverting {table.object_type} table {table.database}.{table.name} upgraded_to {table.upgraded_to}"
309+
)
310+
self._backend.execute(table.sql_unset_upgraded_to("hive_metastore"))
311+
self._backend.execute(f"DROP {table.kind} IF EXISTS {target_table_key}")
312+
313+
def _get_revert_count(self, schema: str | None = None, table: str | None = None) -> list[MigrationCount]:
314+
upgraded_tables = self._get_tables_to_revert(schema=schema, table=table)
315+
316+
table_by_database = defaultdict(list)
317+
for cur_table in upgraded_tables:
318+
table_by_database[cur_table.database].append(cur_table)
319+
320+
migration_list = []
321+
for cur_database in table_by_database.keys():
322+
external_tables = 0
323+
managed_tables = 0
324+
views = 0
325+
for current_table in table_by_database[cur_database]:
326+
if current_table.upgraded_to is not None:
327+
if current_table.kind == "VIEW":
328+
views += 1
329+
continue
330+
if current_table.object_type == "EXTERNAL":
331+
external_tables += 1
332+
continue
333+
if current_table.object_type == "MANAGED":
334+
managed_tables += 1
335+
continue
336+
migration_list.append(
337+
MigrationCount(
338+
database=cur_database, managed_tables=managed_tables, external_tables=external_tables, views=views
339+
)
340+
)
341+
return migration_list
342+
343+
def is_upgraded(self, schema: str, table: str) -> bool:
344+
result = self._backend.fetch(f"SHOW TBLPROPERTIES `{schema}`.`{table}`")
345+
for value in result:
346+
if value["key"] == "upgraded_to":
347+
logger.info(f"{schema}.{table} is set as upgraded")
348+
return True
349+
logger.info(f"{schema}.{table} is set as not upgraded")
350+
return False
351+
352+
def print_revert_report(self, *, delete_managed: bool) -> bool | None:
353+
migrated_count = self._get_revert_count()
354+
if not migrated_count:
355+
logger.info("No migrated tables were found.")
356+
return False
357+
print("The following is the count of migrated tables and views found in scope:")
358+
print("Database | External Tables | Managed Table | Views |")
359+
print("=" * 88)
360+
for count in migrated_count:
361+
print(f"{count.database:<30}| {count.external_tables:16} | {count.managed_tables:16} | {count.views:16} |")
362+
print("=" * 88)
363+
print("Migrated External Tables and Views (targets) will be deleted")
364+
if delete_managed:
365+
print("Migrated Manged Tables (targets) will be deleted")
366+
else:
367+
print("Migrated Manged Tables (targets) will be left intact.")
368+
print("To revert and delete Migrated Tables, add --delete_managed true flag to the command.")
369+
return True

tests/integration/hive_metastore/test_migrate.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,39 @@ def test_migrate_external_table(ws, sql_backend, inventory_schema, make_catalog,
101101

102102
target_tables = list(sql_backend.fetch(f"SHOW TABLES IN {dst_schema.full_name}"))
103103
assert len(target_tables) == 1
104+
105+
106+
@retried(on=[NotFound], timeout=timedelta(minutes=5))
107+
def test_revert_migrated_table(ws, sql_backend, inventory_schema, make_schema, make_table, make_catalog):
108+
src_schema1 = make_schema(catalog_name="hive_metastore")
109+
src_schema2 = make_schema(catalog_name="hive_metastore")
110+
table_to_revert = make_table(schema_name=src_schema1.name)
111+
table_not_migrated = make_table(schema_name=src_schema1.name)
112+
table_to_not_revert = make_table(schema_name=src_schema2.name)
113+
all_tables = [table_to_revert, table_not_migrated, table_to_not_revert]
114+
115+
dst_catalog = make_catalog()
116+
dst_schema1 = make_schema(catalog_name=dst_catalog.name, name=src_schema1.name)
117+
dst_schema2 = make_schema(catalog_name=dst_catalog.name, name=src_schema2.name)
118+
119+
static_crawler = StaticTablesCrawler(sql_backend, inventory_schema, all_tables)
120+
tm = TablesMigrate(static_crawler, ws, sql_backend, dst_catalog.name)
121+
tm.migrate_tables()
122+
123+
tm.revert_migrated_tables(src_schema1.name, delete_managed=True)
124+
125+
# Checking that two of the tables were reverted and one was left intact.
126+
# The first two table belongs to schema 1 and should have not "upgraded_to" property
127+
assert not tm.is_upgraded(table_to_revert.schema_name, table_to_revert.name)
128+
# The second table didn't have the "upgraded_to" property set and should remain that way.
129+
assert not tm.is_upgraded(table_not_migrated.schema_name, table_not_migrated.name)
130+
# The third table belongs to schema2 and had the "upgraded_to" property set and should remain that way.
131+
assert tm.is_upgraded(table_to_not_revert.schema_name, table_to_not_revert.name)
132+
133+
target_tables_schema1 = list(sql_backend.fetch(f"SHOW TABLES IN {dst_schema1.full_name}"))
134+
assert len(target_tables_schema1) == 0
135+
136+
target_tables_schema2 = list(sql_backend.fetch(f"SHOW TABLES IN {dst_schema2.full_name}"))
137+
assert len(target_tables_schema2) == 1
138+
assert target_tables_schema2[0]["database"] == dst_schema2.name
139+
assert target_tables_schema2[0]["tableName"] == table_to_not_revert.name

0 commit comments

Comments
 (0)