Skip to content

Commit bef3afd

Browse files
authored
Let migrate-tables command run as collection (#2654)
## Changes Let `migrate-tables` command to run as collection ### Linked issues Resolves #2610 ### Functionality - [x] modified existing command: `databricks labs ucx migrate-tables` ### Tests - [x] manually tested - [x] added unit tests - [ ] ~added integration tests~ : Covering after #2507
1 parent fad0c3c commit bef3afd

File tree

3 files changed

+88
-43
lines changed

3 files changed

+88
-43
lines changed

labs.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,11 @@ commands:
248248

249249
- name: migrate-tables
250250
description: |
251-
Trigger the migrate-tables workflow and, optionally, migrate-external-hiveserde-tables-in-place-experimental
252-
workflow and migrate-external-tables-ctas workflow.
251+
Trigger the `migrate-tables` workflow and, optionally, `migrate-external-hiveserde-tables-in-place-experimental`
252+
workflow and `migrate-external-tables-ctas workflow`.
253+
flags:
254+
- name: run-as-collection
255+
description: Run the command for the collection of workspaces with ucx installed. Default is False.
253256

254257
- name: migrate-acls
255258
description: |

src/databricks/labs/ucx/cli.py

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -519,34 +519,46 @@ def assign_metastore(
519519

520520

521521
@ucx.command
522-
def migrate_tables(w: WorkspaceClient, prompts: Prompts, *, ctx: WorkspaceContext | None = None):
522+
def migrate_tables(
523+
w: WorkspaceClient,
524+
prompts: Prompts,
525+
*,
526+
ctx: WorkspaceContext | None = None,
527+
run_as_collection: bool = False,
528+
a: AccountClient | None = None,
529+
) -> None:
523530
"""
524531
Trigger the migrate-tables workflow and, optionally, the migrate-external-hiveserde-tables-in-place-experimental
525532
workflow and migrate-external-tables-ctas.
526533
"""
527-
if ctx is None:
528-
ctx = WorkspaceContext(w)
529-
deployed_workflows = ctx.deployed_workflows
530-
deployed_workflows.run_workflow("migrate-tables")
531-
532-
tables = ctx.tables_crawler.snapshot()
533-
hiveserde_tables = [table for table in tables if table.what == What.EXTERNAL_HIVESERDE]
534-
if len(hiveserde_tables) > 0:
535-
percentage_hiveserde_tables = len(hiveserde_tables) / len(tables) * 100
536-
if prompts.confirm(
537-
f"Found {len(hiveserde_tables)} ({percentage_hiveserde_tables:.2f}%) hiveserde tables, do you want to run "
538-
f"the migrate-external-hiveserde-tables-in-place-experimental workflow?"
539-
):
540-
deployed_workflows.run_workflow("migrate-external-hiveserde-tables-in-place-experimental")
541-
542-
external_ctas_tables = [table for table in tables if table.what == What.EXTERNAL_NO_SYNC]
543-
if len(external_ctas_tables) > 0:
544-
percentage_external_ctas_tables = len(external_ctas_tables) / len(tables) * 100
545-
if prompts.confirm(
546-
f"Found {len(external_ctas_tables)} ({percentage_external_ctas_tables:.2f}%) external tables which cannot be migrated using sync"
547-
f", do you want to run the migrate-external-tables-ctas workflow?"
548-
):
549-
deployed_workflows.run_workflow("migrate-external-tables-ctas")
534+
if ctx:
535+
workspace_contexts = [ctx]
536+
else:
537+
workspace_contexts = _get_workspace_contexts(w, a, run_as_collection)
538+
for workspace_context in workspace_contexts:
539+
deployed_workflows = workspace_context.deployed_workflows
540+
deployed_workflows.run_workflow("migrate-tables")
541+
542+
tables = workspace_context.tables_crawler.snapshot()
543+
hiveserde_tables = [table for table in tables if table.what == What.EXTERNAL_HIVESERDE]
544+
if len(hiveserde_tables) > 0:
545+
percentage_hiveserde_tables = len(hiveserde_tables) / len(tables) * 100
546+
if prompts.confirm(
547+
f"Found {len(hiveserde_tables)} ({percentage_hiveserde_tables:.2f}%) hiveserde tables in "
548+
f"{workspace_context.workspace_client.config.host}, do you want to run "
549+
f"the `migrate-external-hiveserde-tables-in-place-experimental` workflow?"
550+
):
551+
deployed_workflows.run_workflow("migrate-external-hiveserde-tables-in-place-experimental")
552+
553+
external_ctas_tables = [table for table in tables if table.what == What.EXTERNAL_NO_SYNC]
554+
if len(external_ctas_tables) > 0:
555+
percentage_external_ctas_tables = len(external_ctas_tables) / len(tables) * 100
556+
if prompts.confirm(
557+
f"Found {len(external_ctas_tables)} ({percentage_external_ctas_tables:.2f}%) external tables which "
558+
f"cannot be migrated using sync in {workspace_context.workspace_client.config.host}, do you want to "
559+
"run the `migrate-external-tables-ctas` workflow?"
560+
):
561+
deployed_workflows.run_workflow("migrate-external-tables-ctas")
550562

551563

552564
@ucx.command

tests/unit/test_cli.py

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -718,30 +718,52 @@ def test_assign_metastore(acc_client, caplog):
718718
assign_metastore(acc_client, "123")
719719

720720

721-
def test_migrate_tables(ws):
722-
ws.jobs.wait_get_run_job_terminated_or_skipped.return_value = Run(
723-
state=RunState(result_state=RunResultState.SUCCESS), start_time=0, end_time=1000, run_duration=1000
721+
@pytest.mark.parametrize("run_as_collection", [False, True])
722+
def test_migrate_tables_calls_migrate_table_job_run_now(
723+
run_as_collection,
724+
workspace_clients,
725+
acc_client,
726+
) -> None:
727+
if not run_as_collection:
728+
workspace_clients = [workspace_clients[0]]
729+
run = Run(
730+
state=RunState(result_state=RunResultState.SUCCESS),
731+
start_time=0,
732+
end_time=1000,
733+
run_duration=1000,
724734
)
725-
prompts = MockPrompts({})
726-
migrate_tables(ws, prompts)
727-
ws.jobs.run_now.assert_called_with(456)
728-
ws.jobs.wait_get_run_job_terminated_or_skipped.assert_called_once()
735+
for workspace_client in workspace_clients:
736+
workspace_client.jobs.wait_get_run_job_terminated_or_skipped.return_value = run
737+
738+
migrate_tables(workspace_clients[0], MockPrompts({}), run_as_collection=run_as_collection, a=acc_client)
739+
740+
for workspace_client in workspace_clients:
741+
workspace_client.jobs.run_now.assert_called_with(456)
742+
workspace_client.jobs.wait_get_run_job_terminated_or_skipped.assert_called_once()
729743

730744

731-
def test_migrate_external_hiveserde_tables_in_place(ws):
745+
def test_migrate_tables_calls_external_hiveserde_tables_job_run_now(ws) -> None:
746+
# TODO: Test for running on a collection when context injection for multiple workspaces is supported.
732747
tables_crawler = create_autospec(TablesCrawler)
733748
table = Table(
734-
catalog="hive_metastore", database="test", name="hiveserde", object_type="UNKNOWN", table_format="HIVE"
749+
catalog="hive_metastore",
750+
database="test",
751+
name="hiveserde",
752+
object_type="UNKNOWN",
753+
table_format="HIVE",
735754
)
736755
tables_crawler.snapshot.return_value = [table]
737756
ctx = WorkspaceContext(ws).replace(tables_crawler=tables_crawler)
738757
ws.jobs.wait_get_run_job_terminated_or_skipped.return_value = Run(
739-
state=RunState(result_state=RunResultState.SUCCESS), start_time=0, end_time=1000, run_duration=1000
758+
state=RunState(result_state=RunResultState.SUCCESS),
759+
start_time=0,
760+
end_time=1000,
761+
run_duration=1000,
740762
)
741763

742764
prompt = (
743-
"Found 1 (.*) hiveserde tables, do you want to run the "
744-
"migrate-external-hiveserde-tables-in-place-experimental workflow?"
765+
"Found 1 (.*) hiveserde tables in https://localhost, do you want to run the "
766+
"`migrate-external-hiveserde-tables-in-place-experimental` workflow?"
745767
)
746768
prompts = MockPrompts({prompt: "Yes"})
747769

@@ -751,20 +773,28 @@ def test_migrate_external_hiveserde_tables_in_place(ws):
751773
ws.jobs.wait_get_run_job_terminated_or_skipped.call_count = 2
752774

753775

754-
def test_migrate_external_tables_ctas(ws):
776+
def test_migrate_tables_calls_external_tables_ctas_job_run_now(ws) -> None:
777+
# TODO: Test for running on a collection when context injection for multiple workspaces is supported.
755778
tables_crawler = create_autospec(TablesCrawler)
756779
table = Table(
757-
catalog="hive_metastore", database="test", name="externalctas", object_type="UNKNOWN", table_format="EXTERNAL"
780+
catalog="hive_metastore",
781+
database="test",
782+
name="externalctas",
783+
object_type="UNKNOWN",
784+
table_format="EXTERNAL",
758785
)
759786
tables_crawler.snapshot.return_value = [table]
760787
ctx = WorkspaceContext(ws).replace(tables_crawler=tables_crawler)
761788
ws.jobs.wait_get_run_job_terminated_or_skipped.return_value = Run(
762-
state=RunState(result_state=RunResultState.SUCCESS), start_time=0, end_time=1000, run_duration=1000
789+
state=RunState(result_state=RunResultState.SUCCESS),
790+
start_time=0,
791+
end_time=1000,
792+
run_duration=1000,
763793
)
764794

765795
prompt = (
766-
"Found 1 (.*) external tables which cannot be migrated using sync, do you want to run the "
767-
"migrate-external-tables-ctas workflow?"
796+
"Found 1 (.*) external tables which cannot be migrated using sync in https://localhost, do you want to run the "
797+
"`migrate-external-tables-ctas` workflow?"
768798
)
769799

770800
prompts = MockPrompts({prompt: "Yes"})

0 commit comments

Comments
 (0)