Skip to content

Commit f6bd734

Browse files
Estimate migration effort based on assessment database (#1008)
## Changes Adding a functionality to broadly estimate the migration effort in days for each asset crawled in the assessment database. ### Linked issues resolve #877 ### Tasks - [x] Add a summary widget for a global estimate per object type - [x] Add an assumption and scope for each object type - [x] Add a new estimates dashboard ### Tests - [x] manually tested
1 parent dbdebb8 commit f6bd734

17 files changed

+203
-7
lines changed

src/databricks/labs/ucx/framework/dashboards.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@
2626
class SimpleQuery:
2727
dashboard_ref: str
2828
name: str
29-
query: str
30-
viz: dict[str, str]
3129
widget: dict[str, str]
30+
viz: dict[str, str]
31+
query: str | None = None
32+
text: str | None = None
3233

3334
@property
3435
def key(self):
@@ -131,6 +132,8 @@ def validate(self):
131132
def _validate_folder(self, dashboard_folder, step_folder):
132133
dashboard_ref = f"{step_folder.stem}_{dashboard_folder.stem}".lower()
133134
for query in self._desired_queries(dashboard_folder, dashboard_ref):
135+
if query.text:
136+
continue
134137
try:
135138
self._get_viz_options(query)
136139
self._get_widget_options(query)
@@ -142,9 +145,15 @@ def _install_widget(self, query: SimpleQuery, dashboard_ref: str):
142145
dashboard_id = self._state.dashboards[dashboard_ref]
143146
widget_options = self._get_widget_options(query)
144147
# widgets are cleaned up every dashboard redeploy
145-
widget = self._ws.dashboard_widgets.create(
146-
dashboard_id, widget_options, 1, visualization_id=self._state.viz[query.key]
147-
)
148+
if query.query:
149+
widget = self._ws.dashboard_widgets.create(
150+
dashboard_id, widget_options, 1, visualization_id=self._state.viz[query.key]
151+
)
152+
elif query.text:
153+
text = query.text[query.text.index("\n") + 1 :]
154+
widget = self._ws.dashboard_widgets.create(dashboard_id, widget_options, 1, text=text)
155+
else:
156+
raise ValueError("Query or Text should be set")
148157
assert widget.id is not None
149158
self._state.widgets[query.key] = widget.id
150159

@@ -245,9 +254,25 @@ def _desired_queries(self, local_folder: Path, dashboard_ref: str) -> list[Simpl
245254
widget=self._parse_magic_comment(f, "-- widget ", text),
246255
)
247256
)
257+
for f in local_folder.glob("*.md"):
258+
if f.name == "README.md":
259+
continue
260+
text = f.read_text("utf8")
261+
desired_queries.append(
262+
SimpleQuery(
263+
dashboard_ref=dashboard_ref,
264+
name=f.name,
265+
text=text,
266+
widget=self._parse_magic_comment(f, "-- widget ", text),
267+
viz={},
268+
)
269+
)
248270
return desired_queries
249271

250272
def _install_viz(self, query: SimpleQuery):
273+
if query.text:
274+
logger.debug(f"Skipping viz {query.name} because it's a text widget")
275+
return None
251276
viz_args = self._get_viz_options(query)
252277
if query.key in self._state.viz:
253278
return self._ws.query_visualizations.update(self._state.viz[query.key], **viz_args)
@@ -265,6 +290,9 @@ def _get_viz_options(self, query: SimpleQuery):
265290
return viz_args
266291

267292
def _install_query(self, query: SimpleQuery, dashboard_name: str, data_source_id: str, parent: str):
293+
if query.text:
294+
logger.debug(f"Skipping query {query.name} because it's a text widget")
295+
return None
268296
query_meta = {
269297
"data_source_id": data_source_id,
270298
"name": f"{dashboard_name} - {query.name}",

src/databricks/labs/ucx/install.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def deploy_schema(sql_backend: SqlBackend, inventory_schema: str):
167167
)
168168
deployer.deploy_view("objects", "queries/views/objects.sql")
169169
deployer.deploy_view("grant_detail", "queries/views/grant_detail.sql")
170+
deployer.deploy_view("table_estimates", "queries/views/table_estimates.sql")
170171

171172

172173
class WorkspaceInstaller:
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
-- widget title=Metastore assignment, row=0, col=0, size_x=2, size_y=8
2+
3+
## 1 - Metastore assignment
4+
5+
The first step of adopting is UC is attaching your current workspace to a UC metastore.
6+
7+
This section automatically detects if your workspace has been attached to a UC metastore, and also detects jobs that can potentially fail when attaching the workspace to the metastore.
8+
9+
Follow the docs below to attach your workspace to the metastore:
10+
11+
[[AWS]](https://docs.databricks.com/en/data-governance/unity-catalog/enable-workspaces.html)
12+
[[Azure]](https://learn.microsoft.com/en-us/azure/databricks/data-governance/unity-catalog/enable-workspaces)
13+
[[GCP]](https://docs.gcp.databricks.com/data-governance/unity-catalog/enable-workspaces.html)
14+
15+
If any incompatible submit runs has been detected, please follow the steps highlighted below:
16+
17+
1. Find out the incompatible jobs in your local orchestrator based on the object_id identified by UCX
18+
2. Change the job configuration to include the following in the ClusterInfo: “data_security_mode”: “NONE”
19+
3. Alternatively:
20+
1. Create Cluster Policy for External Orchestrators and set “data_security_mode”: “NONE”
21+
2. Assign Cluster Policy to Service Principals
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- viz type=counter, name=Metastore assigned, value_column=uc_metastore_assigned
2+
-- widget row=0, col=5, size_x=1, size_y=8
3+
SELECT case when CURRENT_METASTORE() is not null then "Metastore already assigned" else "Metastore not assigned" end as uc_metastore_assigned
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- viz type=table, name=Incompatible submit runs detected, columns=object_type,object_id,failure
2+
-- widget title=Incimpatible submit runs, row=0, col=2, size_x=3, size_y=8
3+
SELECT * FROM
4+
(SELECT object_type, object_id, EXPLODE(from_json(failures, 'array<string>')) AS failure
5+
FROM $inventory.objects) WHERE failure = "no data security mode specified" AND object_type = "submit_runs"
6+
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
-- widget title=Group migration, row=1, col=0, size_x=2, size_y=8
2+
3+
## 2 - Group migration
4+
5+
The second step of succesfully adopting UC if migrating your workspace local groups to the account.
6+
This step is a relatively low risk as it's an additive operation, it won't disturb your currently running pipelines.
7+
8+
Follow those steps in order to successfully migrate your groups to the account:
9+
10+
If you're not using an Identity Provider (Okta, Azure Entra etc...):
11+
1. Create the groups at the account level, consider using the [create-account-groups](https://github.com/databrickslabs/ucx/blob/main/README.md#create-account-groups-command) command.
12+
1. For extra safety, consider running [validate-group-membership](https://github.com/databrickslabs/ucx/blob/main/README.md#validate-groups-membership-command) command to validate that you have the same amount of groups/users in the workspace and the account
13+
2. Enable SCIM at the Account level.
14+
15+
If you're using an Identity Provider:
16+
1. Enable SCIM at the account level
17+
2. Disable SCIM at the workspace level if not done already.
18+
3. Trigger a sync from your IdP to the account
19+
1. To validate that all groups are properly setup for the group migration, run [validate-group-membership](https://github.com/databrickslabs/ucx/blob/main/README.md#validate-groups-membership-command)
20+
21+
Once the account groups are setup, perform the group migration by using the Group migration workflow, more information in the [docs](https://github.com/databrickslabs/ucx/blob/main/README.md#group-migration-workflow)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- viz type=table, name=Workspace local groups, columns=id_in_workspace,name_in_workspace,name_in_account,temporary_name,members,entitlements,external_id,roles
2+
-- widget title=Workspace local groups to migrate, row=1, col=2, size_x=3, size_y=8
3+
SELECT * FROM $inventory.groups
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-- viz type=counter, name=Group migration complexity, counter_label=Group migration complexity, value_column=group_migration_complexity
2+
-- widget row=1, col=5, size_x=1, size_y=8
3+
select
4+
case when total_groups = 0 then NULL
5+
when total_groups between 1 and 50 then "S"
6+
when total_groups between 51 and 200 then "M"
7+
when total_groups > 201 then "L"
8+
ELSE NULL
9+
end as group_migration_complexity from
10+
(SELECT count(*) as total_groups FROM $inventory.groups)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-- widget title=Table estimates, row=2, col=0, size_x=2, size_y=8
2+
## 3 - UC Data modeling
3+
4+
The third step of a successful UC migration is defining your target data model on UC.
5+
This step is required in order to choose in which catalogs yout existing data in Hive Metastore will land.
6+
7+
As a starting point, consider creating a catalog that has the same name as your workspace.
8+
For example, a table `database.table1` will land in the `workspace_name.database.table1` table.
9+
10+
The complexity factor is relative to the number of databases and tables identified during the assessment.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- viz type=table, name=Tables to migrate, columns=catalog,database,name,object_type,table_format,location,view_text,upgraded_to,storage_properties
2+
-- widget title=Tables to migrate, row=2, col=2, size_x=3, size_y=8
3+
select * from $inventory.tables;

0 commit comments

Comments
 (0)