From ab27d2cd9b7c8c091a450e1918d0fe6e3f71a724 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Tue, 6 Jan 2026 10:45:37 -0500 Subject: [PATCH 1/6] fix(optimizer)!: correctly resolve unnest alias shadowing for BigQuery --- sqlglot/optimizer/qualify_tables.py | 3 ++- sqlglot/optimizer/resolver.py | 24 +++++++++++++++++++++++- tests/test_optimizer.py | 22 ++++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 7f4a5a6897..cd19fb22cd 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -174,7 +174,8 @@ def _set_alias( if on_qualify: on_qualify(source) elif isinstance(source, Scope) and source.is_udtf: - _set_alias(udtf := source.expression, canonical_aliases) + udtf = source.expression + _set_alias(udtf, canonical_aliases) table_alias = udtf.args["alias"] diff --git a/sqlglot/optimizer/resolver.py b/sqlglot/optimizer/resolver.py index 4668208b21..c1f9e5ea76 100644 --- a/sqlglot/optimizer/resolver.py +++ b/sqlglot/optimizer/resolver.py @@ -78,7 +78,12 @@ def get_table(self, column: str | exp.Column) -> t.Optional[exp.Identifier]: node_alias = node.args.get("alias") if node_alias: - return exp.to_identifier(node_alias.this) + # For BigQuery UNNEST with UNNEST_COLUMN_ONLY, alias.this is None but + # the alias name is stored in alias.columns[0] + alias_name = node_alias.this + if alias_name is None and len(node_alias.columns) == 1: + alias_name = node_alias.columns[0] + return exp.to_identifier(alias_name) return exp.to_identifier(table_name) @@ -305,6 +310,18 @@ def _get_unambiguous_columns( # Performance optimization - avoid copying first_columns if there is only one table. return SingleValuedMapping(first_columns, first_table) + # For BigQuery (UNNEST_COLUMN_ONLY), build a mapping of original UNNEST aliases + # (from alias.columns[0]) to their source names. This is used to resolve shadowing + # where an UNNEST alias shadows a column name from another table. + unnest_original_aliases: t.Dict[str, str] = {} + if self.dialect.UNNEST_COLUMN_ONLY: + for source_name, source in self.scope.sources.items(): + if isinstance(source, Scope) and isinstance(source.expression, exp.Unnest): + alias_arg = source.expression.args.get("alias") + if alias_arg and alias_arg.columns: + original_alias = alias_arg.columns[0].name + unnest_original_aliases[original_alias] = source_name + unambiguous_columns = {col: first_table for col in first_columns} all_columns = set(unambiguous_columns) @@ -314,6 +331,11 @@ def _get_unambiguous_columns( all_columns.update(columns) for column in ambiguous: + # In BigQuery (UNNEST_COLUMN_ONLY), source/table aliases shadow column names. + # If the column name matches an UNNEST's original alias, map it to that source. + if column in unnest_original_aliases: + unambiguous_columns[column] = unnest_original_aliases[column] + continue unambiguous_columns.pop(column, None) for column in unique.difference(ambiguous): unambiguous_columns[column] = table diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 3fb5df4d18..ca10678f5b 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -1917,6 +1917,28 @@ def test_struct_annotation_bigquery(self): assert annotated.selects[0].type == exp.DataType.build("VARCHAR") + def test_bigquery_unnest_alias_shadowing(self): + """Test that BigQuery UNNEST table alias shadows column names from other tables.""" + sql = """ + SELECT timeline_date + FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03')) AS timeline_date + LEFT JOIN production_tier ON production_tier.timeline_date = timeline_date + """ + schema = {"production_tier": {"timeline_date": "DATE", "id": "INT"}} + + # This should not raise an ambiguous column error + result = optimizer.qualify.qualify( + parse_one(sql, dialect="bigquery"), + schema=schema, + dialect="bigquery", + ) + + result_sql = result.sql(dialect="bigquery") + # Verify the UNNEST alias is preserved as 'timeline_date' + self.assertIn("AS `timeline_date`", result_sql) + # Verify the unqualified timeline_date in SELECT is NOT qualified to production_tier + self.assertNotIn("`production_tier`.`timeline_date` AS", result_sql) + def test_annotate_object_construct(self): sql = "SELECT OBJECT_CONSTRUCT('foo', 'bar', 'a b', 'c d') AS c" From ca7058a5e7b6efe20b25a6dbb5184e69f9378891 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Tue, 6 Jan 2026 12:15:16 -0500 Subject: [PATCH 2/6] minor cleanup --- sqlglot/optimizer/resolver.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/sqlglot/optimizer/resolver.py b/sqlglot/optimizer/resolver.py index c1f9e5ea76..c08df34753 100644 --- a/sqlglot/optimizer/resolver.py +++ b/sqlglot/optimizer/resolver.py @@ -78,10 +78,14 @@ def get_table(self, column: str | exp.Column) -> t.Optional[exp.Identifier]: node_alias = node.args.get("alias") if node_alias: - # For BigQuery UNNEST with UNNEST_COLUMN_ONLY, alias.this is None but - # the alias name is stored in alias.columns[0] alias_name = node_alias.this - if alias_name is None and len(node_alias.columns) == 1: + if ( + not alias_name + and isinstance(node_alias, exp.TableAlias) + and len(node_alias.columns) == 1 + ): + # For BigQuery UNNEST_COLUMN_ONLY, alias.this is None and + # the actual alias is stored in alias.columns[0] alias_name = node_alias.columns[0] return exp.to_identifier(alias_name) @@ -310,17 +314,19 @@ def _get_unambiguous_columns( # Performance optimization - avoid copying first_columns if there is only one table. return SingleValuedMapping(first_columns, first_table) - # For BigQuery (UNNEST_COLUMN_ONLY), build a mapping of original UNNEST aliases - # (from alias.columns[0]) to their source names. This is used to resolve shadowing + # For BigQuery UNNEST_COLUMN_ONLY, build a mapping of original UNNEST aliases + # from alias.columns[0] to their source names. This is used to resolve shadowing # where an UNNEST alias shadows a column name from another table. - unnest_original_aliases: t.Dict[str, str] = {} - if self.dialect.UNNEST_COLUMN_ONLY: - for source_name, source in self.scope.sources.items(): - if isinstance(source, Scope) and isinstance(source.expression, exp.Unnest): - alias_arg = source.expression.args.get("alias") - if alias_arg and alias_arg.columns: - original_alias = alias_arg.columns[0].name - unnest_original_aliases[original_alias] = source_name + unnest_original_aliases: t.Dict[str, str] = { + source.expression.args["alias"].columns[0].name: source_name + for source_name, source in self.scope.sources.items() + if ( + self.dialect.UNNEST_COLUMN_ONLY + and isinstance(source.expression, exp.Unnest) + and (alias_arg := source.expression.args.get("alias")) + and alias_arg.columns + ) + } unambiguous_columns = {col: first_table for col in first_columns} all_columns = set(unambiguous_columns) @@ -331,11 +337,10 @@ def _get_unambiguous_columns( all_columns.update(columns) for column in ambiguous: - # In BigQuery (UNNEST_COLUMN_ONLY), source/table aliases shadow column names. - # If the column name matches an UNNEST's original alias, map it to that source. if column in unnest_original_aliases: unambiguous_columns[column] = unnest_original_aliases[column] continue + unambiguous_columns.pop(column, None) for column in unique.difference(ambiguous): unambiguous_columns[column] = table From 3bc491d1028c0a583732aab68b13c42ae22dfd66 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Tue, 6 Jan 2026 12:16:57 -0500 Subject: [PATCH 3/6] revert change to qualify_tables --- sqlglot/optimizer/qualify_tables.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index cd19fb22cd..7f4a5a6897 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -174,8 +174,7 @@ def _set_alias( if on_qualify: on_qualify(source) elif isinstance(source, Scope) and source.is_udtf: - udtf = source.expression - _set_alias(udtf, canonical_aliases) + _set_alias(udtf := source.expression, canonical_aliases) table_alias = udtf.args["alias"] From b6bc290c06bcd3179d32f47083c489d92cbacb70 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 8 Jan 2026 11:40:56 -0500 Subject: [PATCH 4/6] address PR comments --- sqlglot/optimizer/resolver.py | 11 +---------- tests/test_optimizer.py | 11 +++++++---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/sqlglot/optimizer/resolver.py b/sqlglot/optimizer/resolver.py index c08df34753..68db0b1f98 100644 --- a/sqlglot/optimizer/resolver.py +++ b/sqlglot/optimizer/resolver.py @@ -78,16 +78,7 @@ def get_table(self, column: str | exp.Column) -> t.Optional[exp.Identifier]: node_alias = node.args.get("alias") if node_alias: - alias_name = node_alias.this - if ( - not alias_name - and isinstance(node_alias, exp.TableAlias) - and len(node_alias.columns) == 1 - ): - # For BigQuery UNNEST_COLUMN_ONLY, alias.this is None and - # the actual alias is stored in alias.columns[0] - alias_name = node_alias.columns[0] - return exp.to_identifier(alias_name) + return exp.to_identifier(node_alias.this) return exp.to_identifier(table_name) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index ca10678f5b..c45eaa1bfb 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -1934,10 +1934,13 @@ def test_bigquery_unnest_alias_shadowing(self): ) result_sql = result.sql(dialect="bigquery") - # Verify the UNNEST alias is preserved as 'timeline_date' - self.assertIn("AS `timeline_date`", result_sql) - # Verify the unqualified timeline_date in SELECT is NOT qualified to production_tier - self.assertNotIn("`production_tier`.`timeline_date` AS", result_sql) + self.assertEqual( + result_sql, + "SELECT `timeline_date` AS `timeline_date` " + "FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03', INTERVAL '1' DAY)) AS `timeline_date` " + "LEFT JOIN `production_tier` AS `production_tier` " + "ON `production_tier`.`timeline_date` = `timeline_date`", + ) def test_annotate_object_construct(self): sql = "SELECT OBJECT_CONSTRUCT('foo', 'bar', 'a b', 'c d') AS c" From 8be7feac5548f0c6c4148c5845c5fdcb7fca7617 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 8 Jan 2026 11:42:39 -0500 Subject: [PATCH 5/6] remove comment --- tests/test_optimizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index c45eaa1bfb..ded88dd417 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -1926,7 +1926,6 @@ def test_bigquery_unnest_alias_shadowing(self): """ schema = {"production_tier": {"timeline_date": "DATE", "id": "INT"}} - # This should not raise an ambiguous column error result = optimizer.qualify.qualify( parse_one(sql, dialect="bigquery"), schema=schema, From bbc6e03e2ae30dcae46073b103dc544878561823 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Fri, 9 Jan 2026 09:11:09 -0500 Subject: [PATCH 6/6] address PR comments --- sqlglot/optimizer/resolver.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sqlglot/optimizer/resolver.py b/sqlglot/optimizer/resolver.py index 68db0b1f98..daa8dda6ed 100644 --- a/sqlglot/optimizer/resolver.py +++ b/sqlglot/optimizer/resolver.py @@ -308,16 +308,17 @@ def _get_unambiguous_columns( # For BigQuery UNNEST_COLUMN_ONLY, build a mapping of original UNNEST aliases # from alias.columns[0] to their source names. This is used to resolve shadowing # where an UNNEST alias shadows a column name from another table. - unnest_original_aliases: t.Dict[str, str] = { - source.expression.args["alias"].columns[0].name: source_name - for source_name, source in self.scope.sources.items() - if ( - self.dialect.UNNEST_COLUMN_ONLY - and isinstance(source.expression, exp.Unnest) - and (alias_arg := source.expression.args.get("alias")) - and alias_arg.columns - ) - } + unnest_original_aliases: t.Dict[str, str] = {} + if self.dialect.UNNEST_COLUMN_ONLY: + unnest_original_aliases = { + alias_arg.columns[0].name: source_name + for source_name, source in self.scope.sources.items() + if ( + isinstance(source.expression, exp.Unnest) + and (alias_arg := source.expression.args.get("alias")) + and alias_arg.columns + ) + } unambiguous_columns = {col: first_table for col in first_columns} all_columns = set(unambiguous_columns)