Skip to content

Commit a3db6ce

Browse files
authored
Lint child dependencies recursively (#2226)
## Changes The current code lints local files without considering parent/child relationships (where a given 'parent' file imports or runs a 'child' file). This makes it impossible to lint files in context, where a child file uses variables inherited from a parent file. This PR is progress towards that, by linting child dependencies recursively. ### Linked issues Progresses #2155 Progresses #2156 ### Functionality - [x] modified existing command: `databricks labs ucx lint-local-code` ### Tests - [x] checked integration tests --------- Co-authored-by: Eric Vergnaud <[email protected]>
1 parent 1777db2 commit a3db6ce

File tree

5 files changed

+85
-26
lines changed

5 files changed

+85
-26
lines changed

src/databricks/labs/ucx/source_code/graph.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ def check_registered_dependency(graph):
110110
def root(self):
111111
return self if self._parent is None else self._parent.root
112112

113+
@property
114+
def parent(self):
115+
return self._parent
116+
113117
@property
114118
def all_dependencies(self) -> set[Dependency]:
115119
dependencies: set[Dependency] = set()
@@ -123,6 +127,28 @@ def add_to_dependencies(graph: DependencyGraph) -> bool:
123127
self.visit(add_to_dependencies, set())
124128
return dependencies
125129

130+
@property
131+
def root_dependencies(self) -> set[Dependency]:
132+
roots: set[Dependency] = set()
133+
children: set[Dependency] = set()
134+
135+
def add_to_dependencies(graph: DependencyGraph) -> bool:
136+
dependency = graph.dependency
137+
if dependency in children:
138+
return False # Nothing to do
139+
if dependency in roots:
140+
roots.remove(dependency)
141+
children.add(dependency)
142+
return False
143+
if graph.parent is None:
144+
roots.add(dependency)
145+
return False
146+
children.add(dependency)
147+
return False
148+
149+
self.visit(add_to_dependencies, set())
150+
return roots
151+
126152
@property
127153
def local_dependencies(self) -> set[Dependency]:
128154
return {child.dependency for child in self._dependencies.values()}
@@ -136,17 +162,27 @@ def all_paths(self) -> set[Path]:
136162

137163
def all_relative_names(self) -> set[str]:
138164
"""This method is intended to simplify testing"""
139-
all_names = set[str]()
140-
dependencies = self.all_dependencies
141-
for library_root in self._path_lookup.library_roots:
142-
for dependency in dependencies:
165+
return self._relative_names(self.all_dependencies)
166+
167+
def _relative_names(self, dependencies: set[Dependency]) -> set[str]:
168+
"""This method is intended to simplify testing"""
169+
names = set[str]()
170+
for dependency in dependencies:
171+
for library_root in self._path_lookup.library_roots:
143172
if not dependency.path.is_relative_to(library_root):
144173
continue
145174
relative_path = dependency.path.relative_to(library_root).as_posix()
146175
if relative_path == ".":
147176
continue
148-
all_names.add(relative_path)
149-
return all_names
177+
names.add(relative_path)
178+
return names
179+
180+
def root_paths(self) -> set[Path]:
181+
return {d.path for d in self.root_dependencies}
182+
183+
def root_relative_names(self) -> set[str]:
184+
"""This method is intended to simplify testing"""
185+
return self._relative_names(self.root_dependencies)
150186

151187
# when visit_node returns True it interrupts the visit
152188
def visit(self, visit_node: Callable[[DependencyGraph], bool | None], visited: set[Path]) -> bool:

src/databricks/labs/ucx/source_code/linters/files.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def __init__(
105105
def lint(
106106
self,
107107
prompts: Prompts,
108+
linted_paths: set[Path],
108109
path: Path | None,
109110
stdout: TextIO = sys.stdout,
110111
) -> list[LocatedAdvice]:
@@ -116,13 +117,13 @@ def lint(
116117
validate=lambda p_: Path(p_).exists(),
117118
)
118119
path = Path(response)
119-
located_advices = list(self.lint_path(path))
120+
located_advices = list(self.lint_path(path, linted_paths))
120121
for located in located_advices:
121122
message = located.message_relative_to(path)
122123
stdout.write(f"{message}\n")
123124
return located_advices
124125

125-
def lint_path(self, path: Path) -> Iterable[LocatedAdvice]:
126+
def lint_path(self, path: Path, linted_paths: set[Path]) -> Iterable[LocatedAdvice]:
126127
loader = self._folder_loader if path.is_dir() else self._file_loader
127128
dependency = Dependency(loader, path)
128129
graph = DependencyGraph(dependency, None, self._dependency_resolver, self._path_lookup, self._session_state)
@@ -132,15 +133,26 @@ def lint_path(self, path: Path) -> Iterable[LocatedAdvice]:
132133
for problem in problems:
133134
problem_path = Path('UNKNOWN') if problem.is_path_missing() else problem.source_path.absolute()
134135
yield problem.as_advisory().for_path(problem_path)
135-
for child_path in graph.all_paths:
136-
yield from self._lint_one(child_path)
137-
138-
def _lint_one(self, path: Path) -> Iterable[LocatedAdvice]:
139-
if path.is_dir():
140-
return []
141-
ctx = self._new_linter_context()
142-
linter = FileLinter(ctx, self._path_lookup, self._session_state, path)
143-
return [advice.for_path(path) for advice in linter.lint()]
136+
for dependency in graph.root_dependencies:
137+
yield from self._lint_one(dependency, graph, linted_paths)
138+
139+
def _lint_one(
140+
self, dependency: Dependency, graph: DependencyGraph, linted_paths: set[Path]
141+
) -> Iterable[LocatedAdvice]:
142+
if dependency.path in linted_paths:
143+
return
144+
linted_paths.add(dependency.path)
145+
if dependency.path.is_file():
146+
ctx = self._new_linter_context()
147+
linter = FileLinter(ctx, self._path_lookup, self._session_state, dependency.path)
148+
for advice in linter.lint():
149+
yield advice.for_path(dependency.path)
150+
maybe_graph = graph.locate_dependency(dependency.path)
151+
# problems have already been reported while building the graph
152+
if maybe_graph.graph:
153+
child_graph = maybe_graph.graph
154+
for child_dependency in child_graph.local_dependencies:
155+
yield from self._lint_one(child_dependency, child_graph, linted_paths)
144156

145157

146158
class LocalFileMigrator:

tests/integration/source_code/test_jobs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def test_lint_local_code(simple_ctx):
198198
light_ctx.dependency_resolver,
199199
lambda: linter_context,
200200
)
201-
problems = linter.lint(Prompts(), path_to_scan, StringIO())
201+
problems = linter.lint(Prompts(), set(), path_to_scan, StringIO())
202202
assert len(problems) > 0
203203

204204

tests/unit/source_code/linters/test_files.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,9 @@ def test_linter_walks_directory(mock_path_lookup, migration_index):
122122
linter = LocalCodeLinter(
123123
file_loader, folder_loader, mock_path_lookup, session_state, resolver, lambda: LinterContext(migration_index)
124124
)
125-
advices = linter.lint(prompts, None)
125+
paths: set[Path] = set()
126+
advices = linter.lint(prompts, paths, None)
127+
assert len(paths) > 10
126128
assert not advices
127129

128130

@@ -181,6 +183,7 @@ def test_known_issues(path: Path, migration_index):
181183
resolver,
182184
lambda: LinterContext(migration_index, session_state),
183185
)
184-
advices = linter.lint(MockPrompts({}), path)
186+
linted: set[Path] = set()
187+
advices = linter.lint(MockPrompts({}), linted, path)
185188
for advice in advices:
186189
print(repr(advice))

tests/unit/source_code/test_dependencies.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ def test_dependency_resolver_visits_workspace_notebook_dependencies(simple_depen
3434
assert maybe.graph.all_relative_names() == {"root3.run.py", "root1.run.py", "leaf1.py", "leaf2.py"}
3535

3636

37+
def test_dependency_resolver_locates_root_dependencies(simple_dependency_resolver):
38+
maybe = simple_dependency_resolver.build_notebook_dependency_graph(Path("root3.run.py"), CurrentSessionState())
39+
assert not maybe.failed
40+
assert maybe.graph.root_relative_names() == {"root3.run.py"}
41+
42+
3743
def test_dependency_resolver_visits_local_notebook_dependencies(simple_dependency_resolver):
3844
maybe = simple_dependency_resolver.build_notebook_dependency_graph(Path("root4.py"), CurrentSessionState())
3945
assert not maybe.failed
@@ -52,7 +58,7 @@ def test_dependency_resolver_visits_workspace_file_dependencies(simple_dependenc
5258
assert maybe.graph.all_relative_names() == {'leaf1.py', 'leaf2.py', 'root8.py'}
5359

5460

55-
def test_dependency_resolver_raises_problem_with_unfound_workspace_notebook_dependency(simple_dependency_resolver):
61+
def test_dependency_resolver_raises_problem_with_unresolved_workspace_notebook_dependency(simple_dependency_resolver):
5662
maybe = simple_dependency_resolver.build_notebook_dependency_graph(
5763
Path("root1-no-leaf.run.py"), CurrentSessionState()
5864
)
@@ -69,7 +75,7 @@ def test_dependency_resolver_raises_problem_with_unfound_workspace_notebook_depe
6975
]
7076

7177

72-
def test_dependency_resolver_raises_problem_with_unfound_local_notebook_dependency(simple_dependency_resolver):
78+
def test_dependency_resolver_raises_problem_with_unresolved_local_notebook_dependency(simple_dependency_resolver):
7379
maybe = simple_dependency_resolver.build_notebook_dependency_graph(Path("root4-no-leaf.py"), CurrentSessionState())
7480
assert list(maybe.problems) == [
7581
DependencyProblem(
@@ -142,14 +148,14 @@ def test_dependency_resolver_terminates_at_known_libraries(empty_index, mock_not
142148
assert maybe.failed
143149

144150

145-
def test_dependency_resolver_raises_problem_with_unfound_root_file(simple_dependency_resolver):
151+
def test_dependency_resolver_raises_problem_with_unresolved_root_file(simple_dependency_resolver):
146152
maybe = simple_dependency_resolver.build_local_file_dependency_graph(Path("non-existing.py"), CurrentSessionState())
147153
assert list(maybe.problems) == [
148154
DependencyProblem('file-not-found', 'File not found: non-existing.py', Path("non-existing.py"))
149155
]
150156

151157

152-
def test_dependency_resolver_raises_problem_with_unfound_root_notebook(simple_dependency_resolver):
158+
def test_dependency_resolver_raises_problem_with_unresolved_root_notebook(simple_dependency_resolver):
153159
maybe = simple_dependency_resolver.build_notebook_dependency_graph(Path("unknown_notebook"), CurrentSessionState())
154160
assert list(maybe.problems) == [
155161
DependencyProblem('notebook-not-found', 'Notebook not found: unknown_notebook', Path("unknown_notebook"))
@@ -183,8 +189,10 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So
183189

184190
notebook_loader = FailingNotebookLoader()
185191
notebook_resolver = NotebookResolver(notebook_loader)
186-
pip_resolver = PythonLibraryResolver(KnownList())
187-
resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup)
192+
known_list = KnownList()
193+
pip_resolver = PythonLibraryResolver(known_list)
194+
import_resolver = ImportFileResolver(FileLoader(), known_list)
195+
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, mock_path_lookup)
188196
maybe = resolver.build_notebook_dependency_graph(Path("root5.py"), CurrentSessionState())
189197
assert list(maybe.problems) == [
190198
DependencyProblem('cannot-load-notebook', 'Could not load notebook root5.py', Path('<MISSING_SOURCE_PATH>'))

0 commit comments

Comments
 (0)