Skip to content

Commit fc4ec37

Browse files
authored
Fix sqlglot crasher with 'drop schema ...' statement (#2758)
## Changes Fix a crash ### Linked issues None ### Functionality None ### Tests - [x] added unit tests --------- Co-authored-by: Eric Vergnaud <[email protected]>
1 parent e3d34d1 commit fc4ec37

File tree

2 files changed

+58
-32
lines changed

2 files changed

+58
-32
lines changed

src/databricks/labs/ucx/source_code/linters/from_table.py

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import logging
22
from sqlglot import parse as parse_sql
3-
from sqlglot.expressions import Table, Expression, Use, Create
3+
from sqlglot.expressions import Table, Expression, Use, Create, Drop
44
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
5-
from databricks.labs.ucx.source_code.base import Deprecation, CurrentSessionState, SqlLinter, Fixer
5+
from databricks.labs.ucx.source_code.base import Deprecation, CurrentSessionState, SqlLinter, Fixer, Failure
66

77
logger = logging.getLogger(__name__)
88

@@ -43,37 +43,55 @@ def schema(self):
4343

4444
def lint_expression(self, expression: Expression):
4545
for table in expression.find_all(Table):
46-
if isinstance(expression, Use):
47-
# Sqlglot captures the database name in the Use statement as a Table, with
48-
# the schema as the table name.
49-
self._session_state.schema = table.name
50-
continue
51-
if isinstance(expression, Create) and getattr(expression, "kind", None) == "SCHEMA":
52-
# Sqlglot captures the schema name in the Create statement as a Table, with
53-
# the schema as the db name.
54-
self._session_state.schema = table.db
55-
continue
46+
try:
47+
yield from self._unsafe_lint_expression(expression, table)
48+
except Exception as _: # pylint: disable=broad-exception-caught
49+
yield Failure(
50+
code='sql-parse-error',
51+
message=f"Could not parse SQL expression: {expression} ",
52+
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
53+
start_line=0,
54+
start_col=0,
55+
end_line=0,
56+
end_col=1024,
57+
)
5658

57-
# we only migrate tables in the hive_metastore catalog
58-
if self._catalog(table) != 'hive_metastore':
59-
continue
60-
# Sqlglot uses db instead of schema, watch out for that
61-
src_schema = table.db if table.db else self._session_state.schema
62-
if not src_schema:
63-
logger.error(f"Could not determine schema for table {table.name}")
64-
continue
65-
dst = self._index.get(src_schema, table.name)
66-
if not dst:
67-
continue
68-
yield Deprecation(
69-
code='table-migrated-to-uc',
70-
message=f"Table {src_schema}.{table.name} is migrated to {dst.destination()} in Unity Catalog",
71-
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
72-
start_line=0,
73-
start_col=0,
74-
end_line=0,
75-
end_col=1024,
76-
)
59+
def _unsafe_lint_expression(self, expression: Expression, table: Table):
60+
if isinstance(expression, Use):
61+
# Sqlglot captures the database name in the Use statement as a Table, with
62+
# the schema as the table name.
63+
self._session_state.schema = table.name
64+
return
65+
if isinstance(expression, Drop) and getattr(expression, "kind", None) == "SCHEMA":
66+
# Sqlglot captures the schema name in the Drop statement as a Table, with
67+
# the schema as the db name.
68+
return
69+
if isinstance(expression, Create) and getattr(expression, "kind", None) == "SCHEMA":
70+
# Sqlglot captures the schema name in the Create statement as a Table, with
71+
# the schema as the db name.
72+
self._session_state.schema = table.db
73+
return
74+
75+
# we only migrate tables in the hive_metastore catalog
76+
if self._catalog(table) != 'hive_metastore':
77+
return
78+
# Sqlglot uses db instead of schema, watch out for that
79+
src_schema = table.db if table.db else self._session_state.schema
80+
if not src_schema:
81+
logger.error(f"Could not determine schema for table {table.name}")
82+
return
83+
dst = self._index.get(src_schema, table.name)
84+
if not dst:
85+
return
86+
yield Deprecation(
87+
code='table-migrated-to-uc',
88+
message=f"Table {src_schema}.{table.name} is migrated to {dst.destination()} in Unity Catalog",
89+
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
90+
start_line=0,
91+
start_col=0,
92+
end_line=0,
93+
end_col=1024,
94+
)
7795

7896
@staticmethod
7997
def _catalog(table):

tests/unit/source_code/linters/test_from_table.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,14 @@ def test_parses_create_schema(migration_index):
8787
assert not list(advices)
8888

8989

90+
def test_parses_drop_schema(migration_index):
91+
query = "DROP SCHEMA xyz"
92+
session_state = CurrentSessionState(schema="old")
93+
ftf = FromTableSqlLinter(migration_index, session_state=session_state)
94+
advices = ftf.lint(query)
95+
assert not list(advices)
96+
97+
9098
def test_raises_advice_when_parsing_unsupported_sql(migration_index):
9199
query = "XDESCRIBE DETAILS xyz" # not a valid query
92100
session_state = CurrentSessionState(schema="old")

0 commit comments

Comments
 (0)