Skip to content

Commit 2fd066a

Browse files
authored
Fixed errors related to unsupported cell languages (#3026)
## Changes Our current implementation logs warnings when encountering notebook cells that are not Python or SQL This PR fixes that ### Linked issues Resolves #2977 ### Functionality None ### Tests - [x] manually tested - [x] added unit tests Co-authored-by: Eric Vergnaud <[email protected]>
1 parent 07acc0b commit 2fd066a

File tree

2 files changed

+53
-10
lines changed

2 files changed

+53
-10
lines changed

src/databricks/labs/ucx/source_code/jobs.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import shutil
55
import tempfile
66
from abc import ABC, abstractmethod
7-
from collections.abc import Generator, Iterable
7+
from collections.abc import Generator, Iterable, Callable
88
from contextlib import contextmanager
99
from dataclasses import dataclass
1010
from datetime import datetime, timezone
@@ -651,14 +651,15 @@ def _collect_from_source(
651651
path: Path,
652652
inherited_tree: Tree | None,
653653
) -> Iterable[T]:
654-
iterable: Iterable[T] | None = None
655-
if language is CellLanguage.SQL:
656-
iterable = self._collect_from_sql(source)
657654
if language is CellLanguage.PYTHON:
658655
iterable = self._collect_from_python(source, inherited_tree)
659-
if iterable is None:
660-
logger.warning(f"Language {language.name} not supported yet!")
661-
return
656+
else:
657+
fn: Callable[[str], Iterable[T]] | None = getattr(self, f"_collect_from_{language.name.lower()}", None)
658+
if not fn:
659+
raise ValueError(f"Language {language.name} not supported yet!")
660+
# the below is for disabling a false pylint positive
661+
# pylint: disable=not-callable
662+
iterable = fn(source)
662663
src_timestamp = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc)
663664
src_id = str(path)
664665
for item in iterable:
@@ -667,8 +668,28 @@ def _collect_from_source(
667668
@abstractmethod
668669
def _collect_from_python(self, source: str, inherited_tree: Tree | None) -> Iterable[T]: ...
669670

670-
@abstractmethod
671-
def _collect_from_sql(self, source: str) -> Iterable[T]: ...
671+
def _collect_from_sql(self, _source: str) -> Iterable[T]:
672+
return []
673+
674+
def _collect_from_r(self, _source: str) -> Iterable[T]:
675+
logger.warning("Language R not supported yet!")
676+
return []
677+
678+
def _collect_from_scala(self, _source: str) -> Iterable[T]:
679+
logger.warning("Language scala not supported yet!")
680+
return []
681+
682+
def _collect_from_shell(self, _source: str) -> Iterable[T]:
683+
return []
684+
685+
def _collect_from_markdown(self, _source: str) -> Iterable[T]:
686+
return []
687+
688+
def _collect_from_run(self, _source: str) -> Iterable[T]:
689+
return []
690+
691+
def _collect_from_pip(self, _source: str) -> Iterable[T]:
692+
return []
672693

673694

674695
class DfsaCollectorWalker(_CollectorWalker[DirectFsAccess]):

tests/unit/source_code/linters/test_directfs.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
from collections.abc import Iterable
2+
from pathlib import Path
3+
from unittest.mock import create_autospec
4+
15
import pytest
26

3-
from databricks.labs.ucx.source_code.base import Deprecation, Advice, CurrentSessionState, Failure
7+
from databricks.labs.ucx.source_code.base import Deprecation, Advice, CurrentSessionState, Failure, DirectFsAccess
8+
from databricks.labs.ucx.source_code.graph import DependencyGraph
9+
from databricks.labs.ucx.source_code.jobs import DfsaCollectorWalker
410
from databricks.labs.ucx.source_code.linters.directfs import (
511
DIRECT_FS_ACCESS_PATTERNS,
612
DirectFsAccessPyLinter,
713
DirectFsAccessSqlLinter,
814
)
15+
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage
916

1017

1118
@pytest.mark.parametrize(
@@ -145,3 +152,18 @@ def test_dfsa_queries_failure(query: str) -> None:
145152
end_col=1024,
146153
),
147154
]
155+
156+
157+
class _TestCollectorWalker(DfsaCollectorWalker):
158+
# inherit from DfsaCollectorWalker because it's public
159+
160+
def collect_from_source(self, language: CellLanguage) -> Iterable[DirectFsAccess]:
161+
return self._collect_from_source("empty", language, Path(""), None)
162+
163+
164+
@pytest.mark.parametrize("language", list(iter(CellLanguage)))
165+
def test_collector_supports_all_cell_languages(language, mock_path_lookup, migration_index):
166+
graph = create_autospec(DependencyGraph)
167+
graph.assert_not_called()
168+
collector = _TestCollectorWalker(graph, set(), mock_path_lookup, CurrentSessionState(), migration_index)
169+
list(collector.collect_from_source(language))

0 commit comments

Comments
 (0)