Skip to content

Commit d0275ba

Browse files
authored
changing table_migration to user_isolation (#3389)
<!-- REMOVE IRRELEVANT COMMENTS BEFORE CREATING A PULL REQUEST --> ## Changes Replaced table_migration job cluster to user_isolation job cluster. Its just a naming change and updated in all placed where there is reference <!-- Summary of your changes that are easy to understand. Add screenshots when necessary --> Resolves #3172
1 parent 74245ae commit d0275ba

File tree

7 files changed

+34
-35
lines changed

7 files changed

+34
-35
lines changed

src/databricks/labs/ucx/hive_metastore/workflows.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def convert_managed_table(self, ctx: RuntimeContext):
1616
managed_table_external_storage=ctx.config.managed_table_external_storage
1717
)
1818

19-
@job_task(job_cluster="table_migration", depends_on=[Assessment.crawl_tables, convert_managed_table])
19+
@job_task(job_cluster="user_isolation", depends_on=[Assessment.crawl_tables, convert_managed_table])
2020
def migrate_external_tables_sync(self, ctx: RuntimeContext):
2121
"""This workflow task migrates the external tables that are supported by SYNC command from the Hive Metastore
2222
to the Unity Catalog.
@@ -25,14 +25,14 @@ def migrate_external_tables_sync(self, ctx: RuntimeContext):
2525
what=What.EXTERNAL_SYNC, managed_table_external_storage=ctx.config.managed_table_external_storage
2626
)
2727

28-
@job_task(job_cluster="table_migration", depends_on=[Assessment.crawl_tables, convert_managed_table])
28+
@job_task(job_cluster="user_isolation", depends_on=[Assessment.crawl_tables, convert_managed_table])
2929
def migrate_dbfs_root_delta_tables(self, ctx: RuntimeContext):
3030
"""This workflow task migrates delta tables stored in DBFS root from the Hive Metastore to the Unity Catalog
3131
using deep clone.
3232
"""
3333
ctx.tables_migrator.migrate_tables(what=What.DBFS_ROOT_DELTA)
3434

35-
@job_task(job_cluster="table_migration", depends_on=[Assessment.crawl_tables, convert_managed_table])
35+
@job_task(job_cluster="user_isolation", depends_on=[Assessment.crawl_tables, convert_managed_table])
3636
def migrate_dbfs_root_non_delta_tables(
3737
self,
3838
ctx: RuntimeContext,
@@ -43,7 +43,7 @@ def migrate_dbfs_root_non_delta_tables(
4343
ctx.tables_migrator.migrate_tables(what=What.DBFS_ROOT_NON_DELTA)
4444

4545
@job_task(
46-
job_cluster="table_migration",
46+
job_cluster="user_isolation",
4747
depends_on=[
4848
Assessment.crawl_tables,
4949
migrate_external_tables_sync,
@@ -57,7 +57,7 @@ def migrate_views(self, ctx: RuntimeContext):
5757
"""
5858
ctx.tables_migrator.migrate_tables(what=What.VIEW)
5959

60-
@job_task(job_cluster="table_migration", depends_on=[migrate_views])
60+
@job_task(job_cluster="user_isolation", depends_on=[migrate_views])
6161
def update_migration_status(self, ctx: RuntimeContext):
6262
"""Refresh the migration status to present it in the dashboard."""
6363
ctx.tables_migrator.get_remaining_tables()
@@ -67,7 +67,7 @@ class MigrateHiveSerdeTablesInPlace(Workflow):
6767
def __init__(self):
6868
super().__init__('migrate-external-hiveserde-tables-in-place-experimental')
6969

70-
@job_task(job_cluster="table_migration", depends_on=[Assessment.crawl_tables])
70+
@job_task(job_cluster="user_isolation", depends_on=[Assessment.crawl_tables])
7171
def migrate_hive_serde_in_place(self, ctx: RuntimeContext):
7272
"""This workflow task migrates ParquetHiveSerDe, OrcSerde, AvroSerDe tables in place from
7373
the Hive Metastore to the Unity Catalog."""
@@ -77,7 +77,7 @@ def migrate_hive_serde_in_place(self, ctx: RuntimeContext):
7777
)
7878

7979
@job_task(
80-
job_cluster="table_migration",
80+
job_cluster="user_isolation",
8181
depends_on=[Assessment.crawl_tables, migrate_hive_serde_in_place],
8282
)
8383
def migrate_views(self, ctx: RuntimeContext):
@@ -86,7 +86,7 @@ def migrate_views(self, ctx: RuntimeContext):
8686
"""
8787
ctx.tables_migrator.migrate_tables(what=What.VIEW)
8888

89-
@job_task(job_cluster="table_migration", depends_on=[migrate_views])
89+
@job_task(job_cluster="user_isolation", depends_on=[migrate_views])
9090
def update_migration_status(self, ctx: RuntimeContext):
9191
"""Refresh the migration status to present it in the dashboard."""
9292
ctx.tables_migrator.get_remaining_tables()
@@ -96,22 +96,22 @@ class MigrateExternalTablesCTAS(Workflow):
9696
def __init__(self):
9797
super().__init__('migrate-external-tables-ctas')
9898

99-
@job_task(job_cluster="table_migration", depends_on=[Assessment.crawl_tables])
99+
@job_task(job_cluster="user_isolation", depends_on=[Assessment.crawl_tables])
100100
def migrate_other_external_ctas(self, ctx: RuntimeContext):
101101
"""This workflow task migrates non-SYNC supported and non HiveSerde external tables using CTAS"""
102102
ctx.tables_migrator.migrate_tables(
103103
what=What.EXTERNAL_NO_SYNC,
104104
)
105105

106-
@job_task(job_cluster="table_migration", depends_on=[Assessment.crawl_tables])
106+
@job_task(job_cluster="user_isolation", depends_on=[Assessment.crawl_tables])
107107
def migrate_hive_serde_ctas(self, ctx: RuntimeContext):
108108
"""This workflow task migrates HiveSerde tables using CTAS"""
109109
ctx.tables_migrator.migrate_tables(
110110
what=What.EXTERNAL_HIVESERDE,
111111
)
112112

113113
@job_task(
114-
job_cluster="table_migration",
114+
job_cluster="user_isolation",
115115
depends_on=[Assessment.crawl_tables, migrate_other_external_ctas, migrate_hive_serde_ctas],
116116
)
117117
def migrate_views(self, ctx: RuntimeContext):
@@ -120,7 +120,7 @@ def migrate_views(self, ctx: RuntimeContext):
120120
"""
121121
ctx.tables_migrator.migrate_tables(what=What.VIEW)
122122

123-
@job_task(job_cluster="table_migration", depends_on=[migrate_views])
123+
@job_task(job_cluster="user_isolation", depends_on=[migrate_views])
124124
def update_migration_status(self, ctx: RuntimeContext):
125125
"""Refresh the migration status to present it in the dashboard."""
126126
ctx.tables_migrator.get_remaining_tables()
@@ -137,7 +137,7 @@ def scan_tables_in_mounts_experimental(self, ctx: RuntimeContext):
137137
replacing any existing content that might be present."""
138138
ctx.tables_in_mounts.snapshot(force_refresh=True)
139139

140-
@job_task(job_cluster="table_migration", depends_on=[scan_tables_in_mounts_experimental])
140+
@job_task(job_cluster="user_isolation", depends_on=[scan_tables_in_mounts_experimental])
141141
def update_migration_status(self, ctx: RuntimeContext):
142142
"""Refresh the migration status to present it in the dashboard."""
143143
ctx.tables_migrator.get_remaining_tables()
@@ -147,12 +147,12 @@ class MigrateTablesInMounts(Workflow):
147147
def __init__(self):
148148
super().__init__('migrate-tables-in-mounts-experimental')
149149

150-
@job_task(job_cluster="table_migration", depends_on=[ScanTablesInMounts.scan_tables_in_mounts_experimental])
150+
@job_task(job_cluster="user_isolation", depends_on=[ScanTablesInMounts.scan_tables_in_mounts_experimental])
151151
def migrate_tables_in_mounts_experimental(self, ctx: RuntimeContext):
152152
"""[EXPERIMENTAL] This workflow migrates `delta tables stored in mount points` to Unity Catalog using a Create Table statement."""
153153
ctx.tables_migrator.migrate_tables(what=What.TABLE_IN_MOUNT)
154154

155-
@job_task(job_cluster="table_migration", depends_on=[migrate_tables_in_mounts_experimental])
155+
@job_task(job_cluster="user_isolation", depends_on=[migrate_tables_in_mounts_experimental])
156156
def update_migration_status(self, ctx: RuntimeContext):
157157
"""Refresh the migration status to present it in the dashboard."""
158158
ctx.tables_migrator.get_remaining_tables()

src/databricks/labs/ucx/installer/workflows.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ def _job_cluster_spark_conf(self, cluster_key: str):
741741
return spark_conf | conf_from_installation
742742
if cluster_key == "tacl":
743743
return {"spark.databricks.acl.sqlOnly": "true"} | conf_from_installation
744-
if cluster_key == "table_migration":
744+
if cluster_key == "user_isolation":
745745
return {"spark.sql.sources.parallelPartitionDiscovery.parallelism": "200"} | conf_from_installation
746746
return conf_from_installation
747747

@@ -918,14 +918,13 @@ def _job_clusters(self, names: set[str]):
918918
),
919919
)
920920
)
921-
if "table_migration" in names:
922-
# TODO: rename to "user-isolation", so that we can use it in group migration workflows
921+
if "user_isolation" in names:
923922
clusters.append(
924923
jobs.JobCluster(
925-
job_cluster_key="table_migration",
924+
job_cluster_key="user_isolation",
926925
new_cluster=compute.ClusterSpec(
927926
data_security_mode=compute.DataSecurityMode.USER_ISOLATION,
928-
spark_conf=self._job_cluster_spark_conf("table_migration"),
927+
spark_conf=self._job_cluster_spark_conf("user_isolation"),
929928
policy_id=self._config.policy_id,
930929
autoscale=compute.AutoScale(
931930
max_workers=self._config.max_workers,

src/databricks/labs/ucx/progress/workflows.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class MigrationProgress(Workflow):
2323
def __init__(self) -> None:
2424
super().__init__('migration-progress-experimental')
2525

26-
@job_task(job_cluster="table_migration")
26+
@job_task(job_cluster="user_isolation")
2727
def verify_prerequisites(self, ctx: RuntimeContext) -> None:
2828
"""Verify the prerequisites for running this job on the table migration cluster are fulfilled.
2929
@@ -42,14 +42,14 @@ def crawl_tables(self, ctx: RuntimeContext) -> None:
4242
# Step 1 of 3: Just refresh the inventory.
4343
ctx.tables_crawler.snapshot(force_refresh=True)
4444

45-
@job_task(depends_on=[verify_prerequisites, crawl_tables], job_cluster="table_migration")
45+
@job_task(depends_on=[verify_prerequisites, crawl_tables], job_cluster="user_isolation")
4646
def refresh_table_migration_status(self, ctx: RuntimeContext) -> None:
4747
"""Scan the tables (and views) in the inventory and record whether each has been migrated or not."""
4848
# Step 2 of 3: Refresh the migration status of all the tables (updated in the previous step on the main cluster.)
4949
ctx.migration_status_refresher.snapshot(force_refresh=True)
5050

5151
@job_task(
52-
depends_on=[verify_prerequisites, crawl_tables, refresh_table_migration_status], job_cluster="table_migration"
52+
depends_on=[verify_prerequisites, crawl_tables, refresh_table_migration_status], job_cluster="user_isolation"
5353
)
5454
def update_tables_history_log(self, ctx: RuntimeContext) -> None:
5555
"""Update the history log with the latest tables inventory snapshot."""
@@ -60,7 +60,7 @@ def update_tables_history_log(self, ctx: RuntimeContext) -> None:
6060
tables_snapshot = ctx.tables_crawler.snapshot()
6161
history_log.append_inventory_snapshot(tables_snapshot)
6262

63-
@job_task(depends_on=[verify_prerequisites], job_cluster="table_migration")
63+
@job_task(depends_on=[verify_prerequisites], job_cluster="user_isolation")
6464
def crawl_udfs(self, ctx: RuntimeContext) -> None:
6565
"""Iterates over all UDFs in the Hive Metastore of the current workspace and persists their metadata in the
6666
table named `$inventory_database.udfs`. This inventory is currently used when scanning securable objects for
@@ -69,7 +69,7 @@ def crawl_udfs(self, ctx: RuntimeContext) -> None:
6969
udfs_snapshot = ctx.udfs_crawler.snapshot(force_refresh=True)
7070
history_log.append_inventory_snapshot(udfs_snapshot)
7171

72-
@job_task(depends_on=[verify_prerequisites, crawl_tables, crawl_udfs], job_cluster="table_migration")
72+
@job_task(depends_on=[verify_prerequisites, crawl_tables, crawl_udfs], job_cluster="user_isolation")
7373
def crawl_grants(self, ctx: RuntimeContext) -> None:
7474
"""Scans all securable objects for permissions that have been assigned: this include database-level permissions,
7575
as well permissions directly configured on objects in the (already gathered) table and UDF inventories. The
@@ -82,7 +82,7 @@ def crawl_grants(self, ctx: RuntimeContext) -> None:
8282
grants_snapshot = ctx.grants_crawler.snapshot(force_refresh=True)
8383
history_log.append_inventory_snapshot(grants_snapshot)
8484

85-
@job_task(depends_on=[verify_prerequisites], job_cluster="table_migration")
85+
@job_task(depends_on=[verify_prerequisites], job_cluster="user_isolation")
8686
def assess_jobs(self, ctx: RuntimeContext) -> None:
8787
"""Scans through all the jobs and identifies those that are not compatible with UC. The list of all the jobs is
8888
stored in the `$inventory.jobs` table.
@@ -97,7 +97,7 @@ def assess_jobs(self, ctx: RuntimeContext) -> None:
9797
jobs_snapshot = ctx.jobs_crawler.snapshot(force_refresh=True)
9898
history_log.append_inventory_snapshot(jobs_snapshot)
9999

100-
@job_task(depends_on=[verify_prerequisites], job_cluster="table_migration")
100+
@job_task(depends_on=[verify_prerequisites], job_cluster="user_isolation")
101101
def assess_clusters(self, ctx: RuntimeContext) -> None:
102102
"""Scan through all the clusters and identifies those that are not compatible with UC. The list of all the clusters
103103
is stored in the`$inventory.clusters` table.
@@ -112,7 +112,7 @@ def assess_clusters(self, ctx: RuntimeContext) -> None:
112112
clusters_snapshot = ctx.clusters_crawler.snapshot(force_refresh=True)
113113
history_log.append_inventory_snapshot(clusters_snapshot)
114114

115-
@job_task(depends_on=[verify_prerequisites], job_cluster="table_migration")
115+
@job_task(depends_on=[verify_prerequisites], job_cluster="user_isolation")
116116
def assess_pipelines(self, ctx: RuntimeContext) -> None:
117117
"""This module scans through all the Pipelines and identifies those pipelines which has Azure Service Principals
118118
embedded (who has been given access to the Azure storage accounts via spark configurations) in the pipeline
@@ -127,7 +127,7 @@ def assess_pipelines(self, ctx: RuntimeContext) -> None:
127127
pipelines_snapshot = ctx.pipelines_crawler.snapshot(force_refresh=True)
128128
history_log.append_inventory_snapshot(pipelines_snapshot)
129129

130-
@job_task(depends_on=[verify_prerequisites], job_cluster="table_migration")
130+
@job_task(depends_on=[verify_prerequisites], job_cluster="user_isolation")
131131
def crawl_cluster_policies(self, ctx: RuntimeContext) -> None:
132132
"""This module scans through all the Cluster Policies and get the necessary information
133133
@@ -166,7 +166,7 @@ def assess_workflows(self, ctx: RuntimeContext):
166166
refresh_table_migration_status,
167167
update_tables_history_log,
168168
],
169-
job_cluster="table_migration",
169+
job_cluster="user_isolation",
170170
)
171171
def record_workflow_run(self, ctx: RuntimeContext) -> None:
172172
"""Record the workflow run of this workflow."""

src/databricks/labs/ucx/recon/workflows.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ class MigrationRecon(Workflow):
66
def __init__(self):
77
super().__init__('migrate-data-reconciliation')
88

9-
@job_task(job_cluster="table_migration")
9+
@job_task(job_cluster="user_isolation")
1010
def recon_migration_result(self, ctx: RuntimeContext):
1111
"""This workflow validate post-migration datasets against their pre-migration counterparts. This includes all
1212
tables, by comparing their schema, row counts and row comparison

src/databricks/labs/ucx/workspace_access/workflows.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class LegacyGroupMigration(Workflow):
1111
def __init__(self):
1212
super().__init__('migrate-groups-legacy')
1313

14-
@job_task(job_cluster="table_migration")
14+
@job_task(job_cluster="user_isolation")
1515
def verify_metastore_attached(self, ctx: RuntimeContext):
1616
"""Verifies if a metastore is attached to this workspace. If not, the workflow will fail.
1717
@@ -72,7 +72,7 @@ class PermissionsMigrationAPI(Workflow):
7272
def __init__(self):
7373
super().__init__('migrate-groups')
7474

75-
@job_task(job_cluster="table_migration")
75+
@job_task(job_cluster="user_isolation")
7676
def verify_metastore_attached(self, ctx: RuntimeContext):
7777
"""Verifies if a metastore is attached to this workspace. If not, the workflow will fail.
7878

tests/integration/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,7 @@ def config(self) -> WorkspaceConfig:
10221022
override_clusters={
10231023
"main": default_cluster_id,
10241024
"tacl": tacl_cluster_id,
1025-
"table_migration": table_migration_cluster_id,
1025+
"user_isolation": table_migration_cluster_id,
10261026
},
10271027
workspace_start_path=self.installation.install_folder(),
10281028
renamed_group_prefix=self.renamed_group_prefix,

tests/integration/hive_metastore/test_ext_hms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def test_migration_job_ext_hms(ws, installation_ctx, prepare_tables_for_migratio
2727
wc,
2828
override_clusters={
2929
"main": ext_hms_cluster_id,
30-
"table_migration": ext_hms_cluster_id,
30+
"user_isolation": ext_hms_cluster_id,
3131
},
3232
),
3333
extend_prompts={

0 commit comments

Comments
 (0)