Skip to content

Commit 07acc0b

Browse files
authored
Update migration progress workflow to also re-lint dashboards and jobs (#3025)
## Changes This PR updates the `experimental-migration-progress` workflow so that it also re-runs the linting tasks for dashboard queries and notebooks that are associated with jobs. (It was always the intention that this happen once linting was part of the `assessment` workflow.) ### Linked issues Relates #2678. ### Functionality - modified existing workflow: `migration-progress-experimental` ### Tests - added unit tests - existing integration tests
1 parent 85edcfe commit 07acc0b

File tree

4 files changed

+50
-31
lines changed

4 files changed

+50
-31
lines changed

docs/table_persistence.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ Table Utilization:
1414
| permissions | RW | | RW | RO | | RO | | |
1515
| jobs | RW | RW | | | RO | | | |
1616
| clusters | RW | RW | | | | | | |
17-
| directfs_in_paths | RW | | | | | | | RW |
18-
| directfs_in_queries | RW | | | | | | | RW |
17+
| directfs_in_paths | RW | RW | | | | | | RW |
18+
| directfs_in_queries | RW | RW | | | | | | RW |
1919
| external_locations | RW | | | RO | | | | |
2020
| workspace | RW | | RO | | RO | | | |
2121
| workspace_objects | RW | | | | | | | |
@@ -27,8 +27,8 @@ Table Utilization:
2727
| submit_runs | RW | | | | | | | |
2828
| policies | RW | RW | | | | | | |
2929
| migration_status | | RW | | RW | | RW | | |
30-
| query_problems | RW | | | | | | | RW |
31-
| workflow_problems | RW | | | | | | | RW |
30+
| query_problems | RW | RW | | | | | | RW |
31+
| workflow_problems | RW | RW | | | | | | RW |
3232
| udfs | RW | RW | RO | | | | | |
3333
| logs | RW | | RW | RW | | RW | RW | |
3434
| recon_results | | | | | | | RW | |

src/databricks/labs/ucx/contexts/workflow_task.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from databricks.labs.ucx.assessment.pipelines import PipelinesCrawler
1313
from databricks.labs.ucx.config import WorkspaceConfig
1414
from databricks.labs.ucx.contexts.application import GlobalContext
15-
from databricks.labs.ucx.hive_metastore import TablesInMounts
15+
from databricks.labs.ucx.hive_metastore import TablesInMounts, TablesCrawler
1616
from databricks.labs.ucx.hive_metastore.table_size import TableSizeCrawler
1717
from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler
1818
from databricks.labs.ucx.installer.logs import TaskRunWarningRecorder
@@ -84,7 +84,7 @@ def global_init_scripts_crawler(self) -> GlobalInitScriptCrawler:
8484
return GlobalInitScriptCrawler(self.workspace_client, self.sql_backend, self.inventory_database)
8585

8686
@cached_property
87-
def tables_crawler(self) -> FasterTableScanCrawler:
87+
def tables_crawler(self) -> TablesCrawler:
8888
return FasterTableScanCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
8989

9090
@cached_property

src/databricks/labs/ucx/progress/workflows.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77
class MigrationProgress(Workflow):
88
"""Experimental workflow that rescans the environment to reflect and track progress that has been made.
99
10-
This is a subset of the assessment workflow and covers:
10+
It overlaps substantially with the assessment workflow, covering:
1111
1212
- Clusters
13+
- Dashboards
1314
- Grants
14-
- Jobs
15+
- Jobs (inventory & linting)
1516
- Pipelines
1617
- Policies
1718
- Tables
@@ -122,6 +123,18 @@ def refresh_table_migration_status(self, ctx: RuntimeContext) -> None:
122123
"""
123124
ctx.migration_status_refresher.snapshot(force_refresh=True)
124125

126+
@job_task
127+
def assess_dashboards(self, ctx: RuntimeContext):
128+
"""Scans all dashboards for migration issues in SQL code of embedded widgets.
129+
Also stores direct filesystem accesses for display in the migration dashboard."""
130+
ctx.query_linter.refresh_report(ctx.sql_backend, ctx.inventory_database)
131+
132+
@job_task
133+
def assess_workflows(self, ctx: RuntimeContext):
134+
"""Scans all jobs for migration issues in notebooks.
135+
Also stores direct filesystem accesses for display in the migration dashboard."""
136+
ctx.workflow_linter.refresh_report(ctx.sql_backend, ctx.inventory_database)
137+
125138
@job_task(
126139
depends_on=[
127140
crawl_grants,

tests/unit/progress/test_workflows.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,51 @@
1+
from typing import get_type_hints
12
from unittest.mock import create_autospec
23

34
import pytest
45
from databricks.sdk import WorkspaceClient
56
from databricks.sdk.service.catalog import CatalogInfo, MetastoreAssignment
67
from databricks.sdk.service.jobs import BaseRun, RunResultState, RunState
78

8-
from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler
9-
from databricks.labs.ucx.assessment.jobs import JobsCrawler
10-
from databricks.labs.ucx.assessment.pipelines import PipelinesCrawler
119
from databricks.labs.ucx.progress.workflows import MigrationProgress
1210
from databricks.labs.ucx.contexts.workflow_task import RuntimeContext
13-
from databricks.labs.ucx.hive_metastore import TablesCrawler
14-
from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler
15-
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationStatusRefresher
16-
from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
1711

1812

1913
@pytest.mark.parametrize(
20-
"task, crawler, crawler_class",
21-
[
22-
(MigrationProgress.crawl_tables, RuntimeContext.tables_crawler, TablesCrawler),
23-
(MigrationProgress.crawl_udfs, RuntimeContext.udfs_crawler, UdfsCrawler),
24-
(MigrationProgress.crawl_grants, RuntimeContext.grants_crawler, GrantsCrawler),
25-
(MigrationProgress.assess_jobs, RuntimeContext.jobs_crawler, JobsCrawler),
26-
(MigrationProgress.assess_clusters, RuntimeContext.clusters_crawler, ClustersCrawler),
27-
(MigrationProgress.assess_pipelines, RuntimeContext.pipelines_crawler, PipelinesCrawler),
28-
(MigrationProgress.crawl_cluster_policies, RuntimeContext.policies_crawler, PoliciesCrawler),
29-
(
30-
MigrationProgress.refresh_table_migration_status,
31-
RuntimeContext.migration_status_refresher,
32-
TableMigrationStatusRefresher,
33-
),
34-
],
14+
"task, crawler",
15+
(
16+
(MigrationProgress.crawl_tables, RuntimeContext.tables_crawler),
17+
(MigrationProgress.crawl_udfs, RuntimeContext.udfs_crawler),
18+
(MigrationProgress.crawl_grants, RuntimeContext.grants_crawler),
19+
(MigrationProgress.assess_jobs, RuntimeContext.jobs_crawler),
20+
(MigrationProgress.assess_clusters, RuntimeContext.clusters_crawler),
21+
(MigrationProgress.assess_pipelines, RuntimeContext.pipelines_crawler),
22+
(MigrationProgress.crawl_cluster_policies, RuntimeContext.policies_crawler),
23+
(MigrationProgress.refresh_table_migration_status, RuntimeContext.migration_status_refresher),
24+
),
3525
)
36-
def test_migration_progress_runtime_refresh(run_workflow, task, crawler, crawler_class) -> None:
26+
def test_migration_progress_runtime_refresh(run_workflow, task, crawler) -> None:
27+
crawler_class = get_type_hints(crawler.func)["return"]
3728
mock_crawler = create_autospec(crawler_class)
3829
crawler_name = crawler.attrname
3930
run_workflow(task, **{crawler_name: mock_crawler})
4031
mock_crawler.snapshot.assert_called_once_with(force_refresh=True)
4132

4233

34+
@pytest.mark.parametrize(
35+
"task, linter",
36+
(
37+
(MigrationProgress.assess_dashboards, RuntimeContext.query_linter),
38+
(MigrationProgress.assess_workflows, RuntimeContext.workflow_linter),
39+
),
40+
)
41+
def test_linter_runtime_refresh(run_workflow, task, linter) -> None:
42+
linter_class = get_type_hints(linter.func)["return"]
43+
mock_linter = create_autospec(linter_class)
44+
linter_name = linter.attrname
45+
ctx = run_workflow(task, **{linter_name: mock_linter})
46+
mock_linter.refresh_report.assert_called_once_with(ctx.sql_backend, ctx.inventory_database)
47+
48+
4349
def test_migration_progress_with_valid_prerequisites(run_workflow) -> None:
4450
ws = create_autospec(WorkspaceClient)
4551
ws.metastores.current.return_value = MetastoreAssignment(metastore_id="test", workspace_id=123456789)

0 commit comments

Comments
 (0)