Skip to content

Commit 6dfcaaf

Browse files
asnareericvergnaudnfx
authored
Improve catch-all handling and avoid some pylint suppressions (#1919)
## Changes This PR is stacked on top of #1905; changes include: - Avoid some pylint-suppressions that aren't really necessary. - Improve logging when catch-all handlers are hit so we can better understand what went wrong. - When an internal error occurs during magic-processing, work already completed is returned instead of abandoned. Incidental changes include some more type annotations amongst the unit tests. ### Tests - manually tested - added unit tests --------- Co-authored-by: Eric Vergnaud <[email protected]> Co-authored-by: Eric Vergnaud <[email protected]> Co-authored-by: Serge Smertin <[email protected]>
1 parent 65fdd02 commit 6dfcaaf

File tree

4 files changed

+77
-8
lines changed

4 files changed

+77
-8
lines changed

src/databricks/labs/ucx/source_code/notebooks/cells.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pathlib import Path
1111

1212
from astroid import Call, Const, ImportFrom, Name, NodeNG # type: ignore
13+
from astroid.exceptions import AstroidSyntaxError # type: ignore
1314
from sqlglot import parse as parse_sql, ParseError as SQLParseError
1415

1516
from databricks.sdk.service.workspace import Language
@@ -403,7 +404,8 @@ def build_graph_from_python_source(self, python_code: str) -> list[DependencyPro
403404
problems: list[DependencyProblem] = []
404405
try:
405406
tree = Tree.normalize_and_parse(python_code)
406-
except Exception as e: # pylint: disable=broad-except
407+
except AstroidSyntaxError as e:
408+
logger.debug(f"Could not parse Python code: {python_code}", exc_info=True)
407409
problems.append(DependencyProblem('parse-error', f"Could not parse Python code: {e}"))
408410
return problems
409411
syspath_changes = SysPathChange.extract_from_tree(self._context.session_state, tree)
@@ -485,11 +487,11 @@ def extract_from_tree(
485487
nodes = tree.locate(Call, [("magic_command", Name)])
486488
for command in cls._make_commands_for_magic_command_call_nodes(nodes):
487489
commands.append(command)
488-
return commands, problems
489490
except Exception as e: # pylint: disable=broad-except
491+
logger.debug(f"Internal error while checking magic commands in tree: {tree.root}", exc_info=True)
490492
problem = problem_factory('internal-error', f"While checking magic commands: {e}", tree.root)
491493
problems.append(problem)
492-
return [], problems
494+
return commands, problems
493495

494496
@classmethod
495497
def _make_commands_for_magic_command_call_nodes(cls, nodes: list[Call]):
@@ -527,8 +529,11 @@ def build_dependency_graph(self, graph: DependencyGraph) -> list[DependencyProbl
527529
return [DependencyProblem("library-install-failed", "Missing arguments after 'pip install'")]
528530
return graph.register_library(*argv[2:]) # Skipping %pip install
529531

530-
@staticmethod
531-
def _split(code) -> list[str]:
532+
# Cache re-used regex (and ensure issues are raised during class init instead of upon first use).
533+
_splitter = re.compile(r"(?<!\\)\n")
534+
535+
@classmethod
536+
def _split(cls, code: str) -> list[str]:
532537
"""Split pip cell code into multiple arguments
533538
534539
Note:
@@ -537,7 +542,7 @@ def _split(code) -> list[str]:
537542
Sources:
538543
https://docs.databricks.com/en/libraries/notebooks-python-libraries.html#manage-libraries-with-pip-commands
539544
"""
540-
match = re.search(r"(?<!\\)\n", code)
545+
match = cls._splitter.search(code)
541546
if match:
542547
code = code[: match.start()] # Remove code after non-escaped newline
543548
code = code.replace("\\\n", " ")

tests/unit/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def __repr__(self):
9797

9898

9999
@pytest.fixture
100-
def mock_path_lookup():
100+
def mock_path_lookup() -> PathLookup:
101101
return MockPathLookup()
102102

103103

tests/unit/source_code/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from databricks.labs.ucx.source_code.known import KnownList
99
from databricks.labs.ucx.source_code.linters.files import ImportFileResolver, FileLoader
1010
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, NotebookResolver
11+
from databricks.labs.ucx.source_code.path_lookup import PathLookup
1112
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
1213

1314

@@ -48,7 +49,7 @@ def extended_test_index():
4849

4950

5051
@pytest.fixture
51-
def simple_dependency_resolver(mock_path_lookup):
52+
def simple_dependency_resolver(mock_path_lookup: PathLookup) -> DependencyResolver:
5253
allow_list = KnownList()
5354
library_resolver = PythonLibraryResolver(allow_list)
5455
notebook_resolver = NotebookResolver(NotebookLoader())

tests/unit/source_code/notebooks/test_cells.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,15 @@
1010
from databricks.labs.ucx.source_code.linters.files import FileLoader, ImportFileResolver
1111
from databricks.labs.ucx.source_code.linters.python_ast import Tree
1212
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, PipCell, PipMagic, MagicCommand
13+
from databricks.labs.ucx.source_code.notebooks.cells import (
14+
GraphBuilder,
15+
PythonCell,
16+
)
1317
from databricks.labs.ucx.source_code.notebooks.loaders import (
1418
NotebookResolver,
1519
NotebookLoader,
1620
)
21+
from databricks.labs.ucx.source_code.path_lookup import PathLookup
1722
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
1823
from databricks.labs.ucx.source_code.known import KnownList
1924

@@ -167,6 +172,58 @@ def test_pip_cell_build_dependency_graph_handles_multiline_code():
167172
graph.register_library.assert_called_once_with("databricks")
168173

169174

175+
def test_graph_builder_parse_error(
176+
simple_dependency_resolver: DependencyResolver, mock_path_lookup: PathLookup
177+
) -> None:
178+
"""Check that internal parsing errors are caught and logged."""
179+
# Fixture.
180+
dependency = Dependency(FileLoader(), Path(""))
181+
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
182+
graph.new_graph_builder_context()
183+
builder = GraphBuilder(graph.new_graph_builder_context())
184+
185+
# Run the test.
186+
problems = builder.build_graph_from_python_source("this is not valid python")
187+
188+
# Check results.
189+
assert [
190+
problem
191+
for problem in problems
192+
if problem.code == "parse-error" and problem.message.startswith("Could not parse Python code")
193+
]
194+
195+
196+
def test_parses_python_cell_with_magic_commands(simple_dependency_resolver, mock_path_lookup):
197+
code = """
198+
a = 'something'
199+
%pip install databricks
200+
b = 'else'
201+
"""
202+
cell = PythonCell(code, original_offset=1)
203+
dependency = Dependency(FileLoader(), Path(""))
204+
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
205+
problems = cell.build_dependency_graph(graph)
206+
assert not problems
207+
208+
209+
@pytest.mark.xfail(reason="Line-magic as an expression is not supported ", strict=True)
210+
def test_python_cell_with_expression_magic(
211+
simple_dependency_resolver: DependencyResolver, mock_path_lookup: PathLookup
212+
) -> None:
213+
"""Line magic (%) can be used in places where expressions are expected; check that this is handled."""
214+
# Fixture
215+
code = "current_directory = %pwd"
216+
cell = PythonCell(code, original_offset=1)
217+
dependency = Dependency(FileLoader(), Path(""))
218+
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
219+
220+
# Run the test
221+
problems = cell.build_dependency_graph(graph)
222+
223+
# Verify there were no problems.
224+
assert not problems
225+
226+
170227
@pytest.mark.parametrize(
171228
"code,split",
172229
[
@@ -193,6 +250,12 @@ def test_pip_cell_build_dependency_graph_handles_multiline_code():
193250
],
194251
)
195252
def test_pip_magic_split(code, split):
253+
# Avoid direct protected access to the _split method.
254+
class _PipMagicFriend(PipMagic):
255+
@classmethod
256+
def split(cls, code: str) -> list[str]:
257+
return cls._split(code)
258+
196259
assert PipMagic._split(code) == split # pylint: disable=protected-access
197260

198261

0 commit comments

Comments
 (0)