55from unittest .mock import create_autospec
66
77import pytest
8+ from databricks .labs .blueprint .paths import DBFSPath , WorkspacePath
89from databricks .labs .lsql .backends import MockBackend
9- from databricks .sdk .service .compute import LibraryInstallStatus
10+ from databricks .sdk import WorkspaceClient
11+ from databricks .sdk .errors import NotFound
12+ from databricks .sdk .service import compute , jobs
1013from databricks .sdk .service .jobs import Job , SparkPythonTask
11- from databricks .sdk .service .pipelines import NotebookLibrary , GetPipelineResponse , PipelineLibrary , FileLibrary
14+ from databricks .sdk .service .pipelines import (
15+ GetPipelineResponse ,
16+ FileLibrary ,
17+ NotebookLibrary ,
18+ PipelineLibrary ,
19+ PipelineSpec ,
20+ )
21+ from databricks .sdk .service .workspace import ExportFormat , Language , ObjectInfo
1222
13- from databricks .labs .blueprint . paths import DBFSPath , WorkspacePath
23+ from databricks .labs .ucx . assessment . jobs import JobsCrawler
1424from databricks .labs .ucx .source_code .base import CurrentSessionState
1525from databricks .labs .ucx .source_code .directfs_access import DirectFsAccessCrawler
16- from databricks .labs .ucx .source_code .python_libraries import PythonLibraryResolver
17- from databricks .sdk import WorkspaceClient
18- from databricks .sdk .errors import NotFound
19- from databricks .sdk .service import compute , jobs , pipelines
20- from databricks .sdk .service .workspace import ExportFormat , ObjectInfo , Language
21-
2226from databricks .labs .ucx .source_code .files import FileLoader , ImportFileResolver
2327from databricks .labs .ucx .source_code .graph import (
2428 Dependency ,
2731)
2832from databricks .labs .ucx .source_code .jobs import JobProblem , WorkflowTaskContainer
2933from databricks .labs .ucx .source_code .linters .jobs import WorkflowLinter
30- from databricks .labs .ucx .source_code .notebooks .loaders import NotebookResolver , NotebookLoader
34+ from databricks .labs .ucx .source_code .notebooks .loaders import NotebookLoader , NotebookResolver
35+ from databricks .labs .ucx .source_code .python_libraries import PythonLibraryResolver
3136from databricks .labs .ucx .source_code .used_table import UsedTablesCrawler
3237
3338
@@ -228,10 +233,17 @@ def test_workflow_linter_lint_job_logs_problems(dependency_resolver, mock_path_l
228233 expected_message = "Found job problems:\n UNKNOWN:-1 [library-install-failed] 'pip --disable-pip-version-check install unknown-library"
229234
230235 ws = create_autospec (WorkspaceClient )
236+ jobs_crawler = create_autospec (JobsCrawler )
231237 directfs_crawler = create_autospec (DirectFsAccessCrawler )
232238 used_tables_crawler = create_autospec (UsedTablesCrawler )
233239 linter = WorkflowLinter (
234- ws , dependency_resolver , mock_path_lookup , empty_index , directfs_crawler , used_tables_crawler
240+ ws ,
241+ jobs_crawler ,
242+ dependency_resolver ,
243+ mock_path_lookup ,
244+ empty_index ,
245+ directfs_crawler ,
246+ used_tables_crawler ,
235247 )
236248
237249 libraries = [compute .Library (pypi = compute .PythonPyPiLibrary (package = "unknown-library-name" ))]
@@ -243,6 +255,7 @@ def test_workflow_linter_lint_job_logs_problems(dependency_resolver, mock_path_l
243255 with caplog .at_level (logging .WARNING , logger = "databricks.labs.ucx.source_code.jobs" ):
244256 linter .lint_job (1234 )
245257
258+ jobs_crawler .assert_not_called () # Only called through refresh_report
246259 directfs_crawler .assert_not_called ()
247260 used_tables_crawler .assert_not_called ()
248261 assert any (message .startswith (expected_message ) for message in caplog .messages ), caplog .messages
@@ -326,7 +339,7 @@ def test_workflow_task_container_with_existing_cluster_builds_dependency_graph_p
326339 whl = None ,
327340 ),
328341 messages = None ,
329- status = LibraryInstallStatus .PENDING ,
342+ status = compute . LibraryInstallStatus .PENDING ,
330343 )
331344 ]
332345
@@ -446,7 +459,7 @@ def test_workflow_linter_dlt_pipeline_task(graph) -> None:
446459 ws .pipelines .get .return_value = GetPipelineResponse (
447460 pipeline_id = pipeline .pipeline_id ,
448461 name = "test-pipeline" ,
449- spec = pipelines . PipelineSpec (continuous = False ),
462+ spec = PipelineSpec (continuous = False ),
450463 )
451464
452465 workflow_task_container = WorkflowTaskContainer (ws , task , Job ())
@@ -456,7 +469,7 @@ def test_workflow_linter_dlt_pipeline_task(graph) -> None:
456469 ws .pipelines .get .return_value = GetPipelineResponse (
457470 pipeline_id = pipeline .pipeline_id ,
458471 name = "test-pipeline" ,
459- spec = pipelines . PipelineSpec (
472+ spec = PipelineSpec (
460473 libraries = [
461474 PipelineLibrary (
462475 jar = "some.jar" ,
@@ -549,19 +562,21 @@ def test_workflow_linter_refresh_report(dependency_resolver, mock_path_lookup, m
549562 ws .jobs .get .return_value = Job (job_id = 2 , settings = settings )
550563
551564 sql_backend = MockBackend ()
565+ jobs_crawler = create_autospec (JobsCrawler )
552566 directfs_crawler = DirectFsAccessCrawler .for_paths (sql_backend , "test" )
553567 used_tables_crawler = UsedTablesCrawler .for_paths (sql_backend , "test" )
554568 linter = WorkflowLinter (
555569 ws ,
570+ jobs_crawler ,
556571 dependency_resolver ,
557572 mock_path_lookup ,
558573 migration_index ,
559574 directfs_crawler ,
560575 used_tables_crawler ,
561- [1 ],
562576 )
563577 linter .refresh_report (sql_backend , 'test' )
564578
579+ jobs_crawler .snapshot .assert_called_once ()
565580 sql_backend .has_rows_written_for ('test.workflow_problems' )
566581 sql_backend .has_rows_written_for ('hive_metastore.test.used_tables_in_paths' )
567582 sql_backend .has_rows_written_for ('hive_metastore.test.directfs_in_paths' )
0 commit comments