Skip to content

Commit f5c8029

Browse files
authored
Improve robustness of sqlglot failure handling (#2952)
This PR fixes inadequate parsing quality from SQLglot
1 parent 69a0cf8 commit f5c8029

File tree

3 files changed

+14
-15
lines changed

3 files changed

+14
-15
lines changed

src/databricks/labs/ucx/source_code/linters/from_table.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def schema(self) -> str:
5353
return self._session_state.schema
5454

5555
def lint_expression(self, expression: Expression) -> Iterable[Deprecation]:
56-
for info in SqlExpression(expression).collect_table_infos("hive_metastore", self._session_state):
56+
for info in SqlExpression(expression).collect_used_tables("hive_metastore", self._session_state):
5757
dst = self._index.get(info.schema_name, info.table_name)
5858
if not dst:
5959
return
@@ -70,7 +70,7 @@ def lint_expression(self, expression: Expression) -> Iterable[Deprecation]:
7070
def collect_tables(self, source_code: str) -> Iterable[UsedTable]:
7171
try:
7272
for info in SqlParser.walk_expressions(
73-
source_code, lambda e: e.collect_table_infos("hive_metastore", self._session_state)
73+
source_code, lambda e: e.collect_used_tables("hive_metastore", self._session_state)
7474
):
7575
if any(pattern.matches(info.table_name) for pattern in DIRECT_FS_ACCESS_PATTERNS):
7676
continue

src/databricks/labs/ucx/source_code/sql/sql_parser.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from collections.abc import Callable, Iterable, Iterator
2+
from collections.abc import Callable, Iterable
33
from typing import TypeVar
44

55
from sqlglot import parse
@@ -19,8 +19,8 @@ class SqlExpression:
1919
def __init__(self, expression: Expression):
2020
self._expression = expression
2121

22-
def collect_table_infos(self, required_catalog: str, session_state: CurrentSessionState) -> Iterable[UsedTable]:
23-
for table in self._expression.find_all(Table):
22+
def collect_used_tables(self, required_catalog: str, session_state: CurrentSessionState) -> Iterable[UsedTable]:
23+
for table in self.find_all(Table):
2424
info = self._collect_table_info(table, required_catalog, session_state)
2525
if info:
2626
yield info
@@ -62,8 +62,12 @@ def _collect_table_info(
6262
is_write=isinstance(self._expression, (Create, Update, Delete)),
6363
)
6464

65-
def find_all(self, klass: type[E]) -> Iterator[E]:
66-
return self._expression.find_all(klass)
65+
def find_all(self, klass: type[E]) -> Iterable[E]:
66+
try:
67+
return self._expression.find_all(klass)
68+
except SqlglotError as e:
69+
logger.warning(f"Failed to find all {klass} in expression: {self._expression}", exc_info=e)
70+
return []
6771

6872

6973
class SqlParser:
@@ -77,5 +81,4 @@ def walk_expressions(cls, sql_code: str, callback: Callable[[SqlExpression], Ite
7781
continue
7882
yield from callback(SqlExpression(expression))
7983
except SqlglotError as e:
80-
logger.debug(f"Failed to parse SQL: {sql_code}", exc_info=e)
81-
raise e
84+
logger.warning(f"Failed to parse SQL: {sql_code}", exc_info=e)
Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
1-
import pytest
2-
from sqlglot import ParseError
3-
41
from databricks.labs.ucx.source_code.sql.sql_parser import SqlParser
52

63

7-
def test_raises_exception_with_unsupported_sql() -> None:
8-
with pytest.raises(ParseError):
9-
list(SqlParser.walk_expressions("XSELECT * from nowhere", lambda _: []))
4+
def test_does_not_raise_exception_with_unsupported_sql() -> None:
5+
assert len(list(SqlParser.walk_expressions("XSELECT * from nowhere", lambda _: []))) == 0

0 commit comments

Comments
 (0)