Skip to content

Commit 07188c7

Browse files
authored
Handle non UTF-8 encoded notebook error explicitly (#3376)
## Changes Handle non UTF-8 encoded notebook error explicitly ### Linked issues Resolves #3374 ### Functionality - [x] modified existing workflow: `assessment` ### Tests - [x] added unit tests
1 parent 438ffc8 commit 07188c7

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

src/databricks/labs/ucx/source_code/notebooks/loaders.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So
7373
exc_info=True,
7474
)
7575
return None
76+
except UnicodeDecodeError:
77+
logger.warning(f"Cannot decode non-UTF-8 encoded notebook from workspace: {absolute_path}")
78+
return None
7679
language = self._detect_language(absolute_path, content)
7780
if not language:
7881
logger.warning(f"Could not detect language for {absolute_path}")

tests/unit/source_code/notebooks/test_loader.py renamed to tests/unit/source_code/notebooks/test_loaders.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pathlib import Path
33
from unittest.mock import create_autospec
44

5+
import pytest
56
from databricks.sdk.service.workspace import Language
67

78
from databricks.labs.ucx.source_code.graph import Dependency
@@ -24,9 +25,19 @@ def detect_language(cls, path: Path, content: str):
2425
assert not NotebookLoaderForTesting.detect_language(Path("hi"), "stuff")
2526

2627

27-
def test_notebook_loader_loads_dependency_with_permission_error(caplog) -> None:
28+
@pytest.mark.parametrize(
29+
"error, message",
30+
[
31+
(PermissionError("Permission denied"), "Permission error while reading notebook from workspace"),
32+
(
33+
UnicodeDecodeError("utf-8", b"\x80\x81\x82", 0, 1, "invalid start byte"),
34+
"Cannot decode non-UTF-8 encoded notebook from workspace",
35+
),
36+
],
37+
)
38+
def test_notebook_loader_loads_dependency_raises_error(caplog, error: Exception, message: str) -> None:
2839
path = create_autospec(Path)
29-
path.read_text.side_effect = PermissionError("Permission denied")
40+
path.read_text.side_effect = error
3041
path_lookup = create_autospec(PathLookup)
3142
path_lookup.resolve.return_value = path
3243
dependency = create_autospec(Dependency)
@@ -35,5 +46,5 @@ def test_notebook_loader_loads_dependency_with_permission_error(caplog) -> None:
3546
with caplog.at_level(logging.WARNING, logger="databricks.labs.ucx.source_code.notebooks.loaders"):
3647
found = NotebookLoader().load_dependency(path_lookup, dependency)
3748

38-
assert f"Permission error while reading notebook from workspace: {path}" in caplog.text
49+
assert f"{message}: {path}" in caplog.text
3950
assert found is None

0 commit comments

Comments
 (0)