Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions sqlglot/optimizer/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,21 @@ def _get_unambiguous_columns(
# Performance optimization - avoid copying first_columns if there is only one table.
return SingleValuedMapping(first_columns, first_table)

# For BigQuery UNNEST_COLUMN_ONLY, build a mapping of original UNNEST aliases
# from alias.columns[0] to their source names. This is used to resolve shadowing
# where an UNNEST alias shadows a column name from another table.
unnest_original_aliases: t.Dict[str, str] = {}
if self.dialect.UNNEST_COLUMN_ONLY:
unnest_original_aliases = {
alias_arg.columns[0].name: source_name
for source_name, source in self.scope.sources.items()
if (
isinstance(source.expression, exp.Unnest)
and (alias_arg := source.expression.args.get("alias"))
and alias_arg.columns
)
}

unambiguous_columns = {col: first_table for col in first_columns}
all_columns = set(unambiguous_columns)

Expand All @@ -314,6 +329,10 @@ def _get_unambiguous_columns(
all_columns.update(columns)

for column in ambiguous:
if column in unnest_original_aliases:
unambiguous_columns[column] = unnest_original_aliases[column]
continue

unambiguous_columns.pop(column, None)
for column in unique.difference(ambiguous):
unambiguous_columns[column] = table
Expand Down
24 changes: 24 additions & 0 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,30 @@ def test_struct_annotation_bigquery(self):

assert annotated.selects[0].type == exp.DataType.build("VARCHAR")

def test_bigquery_unnest_alias_shadowing(self):
"""Test that BigQuery UNNEST table alias shadows column names from other tables."""
sql = """
SELECT timeline_date
FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03')) AS timeline_date
LEFT JOIN production_tier ON production_tier.timeline_date = timeline_date
"""
schema = {"production_tier": {"timeline_date": "DATE", "id": "INT"}}

result = optimizer.qualify.qualify(
parse_one(sql, dialect="bigquery"),
schema=schema,
dialect="bigquery",
)

result_sql = result.sql(dialect="bigquery")
self.assertEqual(
result_sql,
"SELECT `timeline_date` AS `timeline_date` "
"FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03', INTERVAL '1' DAY)) AS `timeline_date` "
"LEFT JOIN `production_tier` AS `production_tier` "
"ON `production_tier`.`timeline_date` = `timeline_date`",
)

def test_annotate_object_construct(self):
sql = "SELECT OBJECT_CONSTRUCT('foo', 'bar', 'a b', 'c d') AS c"

Expand Down