diff --git a/sqlglot/optimizer/resolver.py b/sqlglot/optimizer/resolver.py index 4668208b21..daa8dda6ed 100644 --- a/sqlglot/optimizer/resolver.py +++ b/sqlglot/optimizer/resolver.py @@ -305,6 +305,21 @@ def _get_unambiguous_columns( # Performance optimization - avoid copying first_columns if there is only one table. return SingleValuedMapping(first_columns, first_table) + # For BigQuery UNNEST_COLUMN_ONLY, build a mapping of original UNNEST aliases + # from alias.columns[0] to their source names. This is used to resolve shadowing + # where an UNNEST alias shadows a column name from another table. + unnest_original_aliases: t.Dict[str, str] = {} + if self.dialect.UNNEST_COLUMN_ONLY: + unnest_original_aliases = { + alias_arg.columns[0].name: source_name + for source_name, source in self.scope.sources.items() + if ( + isinstance(source.expression, exp.Unnest) + and (alias_arg := source.expression.args.get("alias")) + and alias_arg.columns + ) + } + unambiguous_columns = {col: first_table for col in first_columns} all_columns = set(unambiguous_columns) @@ -314,6 +329,10 @@ def _get_unambiguous_columns( all_columns.update(columns) for column in ambiguous: + if column in unnest_original_aliases: + unambiguous_columns[column] = unnest_original_aliases[column] + continue + unambiguous_columns.pop(column, None) for column in unique.difference(ambiguous): unambiguous_columns[column] = table diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 3fb5df4d18..ded88dd417 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -1917,6 +1917,30 @@ def test_struct_annotation_bigquery(self): assert annotated.selects[0].type == exp.DataType.build("VARCHAR") + def test_bigquery_unnest_alias_shadowing(self): + """Test that BigQuery UNNEST table alias shadows column names from other tables.""" + sql = """ + SELECT timeline_date + FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03')) AS timeline_date + LEFT JOIN production_tier ON production_tier.timeline_date = timeline_date + """ + schema = {"production_tier": {"timeline_date": "DATE", "id": "INT"}} + + result = optimizer.qualify.qualify( + parse_one(sql, dialect="bigquery"), + schema=schema, + dialect="bigquery", + ) + + result_sql = result.sql(dialect="bigquery") + self.assertEqual( + result_sql, + "SELECT `timeline_date` AS `timeline_date` " + "FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03', INTERVAL '1' DAY)) AS `timeline_date` " + "LEFT JOIN `production_tier` AS `production_tier` " + "ON `production_tier`.`timeline_date` = `timeline_date`", + ) + def test_annotate_object_construct(self): sql = "SELECT OBJECT_CONSTRUCT('foo', 'bar', 'a b', 'c d') AS c"