From dbbc3753ab603ec493b7775e36dbfef9d10258ef Mon Sep 17 00:00:00 2001 From: George Sittas Date: Thu, 20 Nov 2025 15:27:38 +0200 Subject: [PATCH 1/7] Feat(optimizer): canonicalize table aliases --- sqlglot/optimizer/qualify.py | 4 + sqlglot/optimizer/qualify_tables.py | 113 +++++++++++++++++----------- tests/test_optimizer.py | 71 +++++++++++++++++ 3 files changed, 142 insertions(+), 46 deletions(-) diff --git a/sqlglot/optimizer/qualify.py b/sqlglot/optimizer/qualify.py index 4af67f15fe..d7bf7aa9b7 100644 --- a/sqlglot/optimizer/qualify.py +++ b/sqlglot/optimizer/qualify.py @@ -30,6 +30,7 @@ def qualify( validate_qualify_columns: bool = True, quote_identifiers: bool = True, identify: bool = True, + canonicalize_table_aliases: bool = False, on_qualify: t.Optional[t.Callable[[exp.Expression], None]] = None, ) -> exp.Expression: """ @@ -62,6 +63,8 @@ def qualify( This step is necessary to ensure correctness for case sensitive queries. But this flag is provided in case this step is performed at a later time. identify: If True, quote all identifiers, else only necessary ones. + canonicalize_table_aliases: Whether to use canonical aliases (_0, _1, ...) for all sources + instead of preserving table names. on_qualify: Callback after a table has been qualified. Returns: @@ -81,6 +84,7 @@ def qualify( catalog=catalog, dialect=dialect, on_qualify=on_qualify, + canonicalize=canonicalize_table_aliases, ) if isolate_tables: diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 3e9935149a..a8d543347f 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -4,7 +4,7 @@ from sqlglot import exp from sqlglot.dialects.dialect import Dialect, DialectType -from sqlglot.helper import name_sequence +from sqlglot.helper import name_sequence, seq_get from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.scope import Scope, traverse_scope @@ -18,6 +18,7 @@ def qualify_tables( catalog: t.Optional[str | exp.Identifier] = None, on_qualify: t.Optional[t.Callable[[exp.Expression], None]] = None, dialect: DialectType = None, + canonicalize: bool = False, ) -> E: """ Rewrite sqlglot AST to have fully qualified tables. Join constructs such as @@ -39,13 +40,15 @@ def qualify_tables( catalog: Catalog name on_qualify: Callback after a table has been qualified. dialect: The dialect to parse catalog and schema into. + canonicalize: Whether to use canonical aliases (_0, _1, ...) for all sources + instead of preserving table names. Defaults to False. Returns: The qualified expression. """ dialect = Dialect.get_or_raise(dialect) - alias_sequence = name_sequence("_q_") + alias_sequence = name_sequence("_" if canonicalize else "_q_") def next_alias_name() -> str: return normalize_identifiers(alias_sequence(), dialect=dialect).name @@ -74,6 +77,32 @@ def _qualify(table: exp.Table) -> None: if isinstance(node, exp.Table) and node.name not in cte_names: _qualify(node) + canonical_aliases: t.Dict[str, str] = {} + + def _set_alias( + expression: exp.Expression, + target_alias: t.Optional[str] = None, + scope: t.Optional[Scope] = None, + normalize: bool = False, + ) -> None: + alias = expression.args.get("alias") or exp.TableAlias() + + if canonicalize: + new_alias_name = next_alias_name() + canonical_aliases[alias.name or target_alias or ""] = new_alias_name + elif not alias.name: + new_alias_name = target_alias or next_alias_name() + if normalize: + new_alias_name = normalize_identifiers(new_alias_name, dialect=dialect).name + else: + return + + alias.set("this", exp.to_identifier(new_alias_name)) + expression.set("alias", alias) + + if scope: + scope.rename_source(None, new_alias_name) + for scope in traverse_scope(expression): for query in scope.subqueries: subquery = query.parent @@ -88,61 +117,43 @@ def _qualify(table: exp.Table) -> None: derived_table.this.replace(exp.select("*").from_(unnested.copy(), copy=False)) derived_table.this.set("joins", joins) - if not derived_table.args.get("alias"): - alias = next_alias_name() - derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias))) - scope.rename_source(None, alias) - - pivots = derived_table.args.get("pivots") - if pivots and not pivots[0].alias: - pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_alias_name()))) + _set_alias(derived_table, scope=scope) + if pivot := seq_get(derived_table.args.get("pivots") or [], 0): + _set_alias(pivot) table_aliases = {} for name, source in scope.sources.items(): if isinstance(source, exp.Table): - pivots = source.args.get("pivots") - if not source.alias: - # Don't add the pivot's alias to the pivoted table, use the table's name instead - if pivots and pivots[0].alias == name: - name = source.name - - # Mutates the source by attaching an alias to it - normalized_alias = normalize_identifiers( - name or source.name or alias_sequence(), dialect=dialect - ) - exp.alias_(source, normalized_alias, copy=False, table=True) - - table_aliases[".".join(p.name for p in source.parts)] = exp.to_identifier( - source.alias - ) - - if pivots: - pivot = pivots[0] - if not pivot.alias: - pivot_alias = normalize_identifiers( - source.alias if pivot.unpivot else alias_sequence(), - dialect=dialect, - ) - pivot.set("alias", exp.TableAlias(this=exp.to_identifier(pivot_alias))) + # When the name is empty, it means that we have a non-table source, e.g. a pivoted Cte + is_real_table_source = bool(name) + + if pivot := seq_get(source.args.get("pivots") or [], 0): + name = source.name + + _set_alias(source, target_alias=name or source.name or None, normalize=True) + + source_fqn = ".".join(p.name for p in source.parts) + table_aliases[source_fqn] = exp.to_identifier(source.alias) + + if pivot: + target_alias = source.alias if pivot.unpivot else None + _set_alias(pivot, target_alias=target_alias, normalize=True) # This case corresponds to a pivoted CTE, we don't want to qualify that if isinstance(scope.sources.get(source.alias_or_name), Scope): continue - _qualify(source) + if is_real_table_source: + _qualify(source) - if on_qualify: - on_qualify(source) + if on_qualify: + on_qualify(source) elif isinstance(source, Scope) and source.is_udtf: - udtf = source.expression - table_alias = udtf.args.get("alias") or exp.TableAlias( - this=exp.to_identifier(next_alias_name()) - ) - udtf.set("alias", table_alias) - - if not table_alias.name: - table_alias.set("this", exp.to_identifier(next_alias_name())) + _set_alias(udtf := source.expression) + + table_alias = udtf.args["alias"] + if isinstance(udtf, exp.Values) and not table_alias.columns: column_aliases = [ normalize_identifiers(i, dialect=dialect) @@ -152,9 +163,11 @@ def _qualify(table: exp.Table) -> None: for table in scope.tables: if not table.alias and isinstance(table.parent, (exp.From, exp.Join)): - exp.alias_(table, table.name, copy=False, table=True) + _set_alias(table, target_alias=table.name) for column in scope.columns: + table = column.table + if column.db: table_alias = table_aliases.get(".".join(p.name for p in column.parts[0:-1])) @@ -163,5 +176,13 @@ def _qualify(table: exp.Table) -> None: column.set(p, None) column.set("table", table_alias.copy()) + elif ( + canonical_aliases + and table + and (canonical_table := canonical_aliases.get(table, "")) != column.table + ): + # Amend existing aliases, e.g. t.c -> _0.c if t is aliased to _0 + column.set("table", exp.to_identifier(canonical_table)) + pass return expression diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index da525b8245..a6c2d2f454 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -240,6 +240,77 @@ def test_isolate_table_selects(self): ) def test_qualify_tables(self): + self.assertEqual( + optimizer.qualify_tables.qualify_tables( + parse_one("SELECT * FROM t"), + db="db", + catalog="c", + canonicalize=True, + ).sql(), + "SELECT * FROM c.db.t AS _0", + ) + + self.assertEqual( + optimizer.qualify_tables.qualify_tables( + parse_one("SELECT * FROM t1 JOIN t2 ON t1.id = t2.id"), + db="db", + catalog="c", + canonicalize=True, + ).sql(), + "SELECT * FROM c.db.t1 AS _0 JOIN c.db.t2 AS _1 ON _0.id = _1.id", + ) + + self.assertEqual( + optimizer.qualify_tables.qualify_tables( + parse_one("SELECT * FROM db1.users JOIN db2.users ON db1.users.id = db2.users.id"), + catalog="c", + canonicalize=True, + ).sql(), + "SELECT * FROM c.db1.users AS _0 JOIN c.db2.users AS _1 ON _0.id = _1.id", + ) + + self.assertEqual( + optimizer.qualify_tables.qualify_tables( + parse_one("WITH cte AS (SELECT * FROM t) SELECT * FROM cte"), + db="db", + catalog="c", + canonicalize=True, + ).sql(), + "WITH cte AS (SELECT * FROM c.db.t AS _0) SELECT * FROM cte AS _1", + ) + + self.assertEqual( + optimizer.qualify_tables.qualify_tables( + parse_one("SELECT * FROM (SELECT * FROM t)"), + db="db", + catalog="c", + canonicalize=True, + ).sql(), + "SELECT * FROM (SELECT * FROM c.db.t AS _0) AS _1", + ) + + self.assertEqual( + optimizer.qualify_tables.qualify_tables( + parse_one("SELECT * FROM t1, (SELECT * FROM t2) AS sub, t3"), + db="db", + catalog="c", + canonicalize=True, + ).sql(), + "SELECT * FROM c.db.t1 AS _2, (SELECT * FROM c.db.t2 AS _0) AS _1, c.db.t3 AS _3", + ) + + self.assertEqual( + optimizer.qualify_tables.qualify_tables( + parse_one( + "WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y'))" + ), + db="db", + catalog="c", + canonicalize=True, + ).sql(), + "WITH cte AS (SELECT * FROM c.db.t AS _0) SELECT * FROM cte AS _1 PIVOT(SUM(c) FOR v IN ('x', 'y')) AS _2", + ) + self.assertEqual( optimizer.qualify.qualify( parse_one("WITH tesT AS (SELECT * FROM t1) SELECT * FROM test", "bigquery"), From 7bb18a45a628a2f61c9041373295b85ee1233403 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 21 Nov 2025 16:52:03 +0200 Subject: [PATCH 2/7] Change _q_ alias sequence to plain _ --- sqlglot/optimizer/qualify.py | 2 +- sqlglot/optimizer/qualify_tables.py | 16 +-- tests/fixtures/optimizer/optimizer.sql | 128 +++++++++--------- .../optimizer/pushdown_projections.sql | 34 ++--- tests/fixtures/optimizer/qualify_columns.sql | 78 +++++------ .../qualify_columns__with_invisible.sql | 8 +- .../optimizer/qualify_columns_ddl.sql | 6 +- tests/fixtures/optimizer/qualify_tables.sql | 24 ++-- tests/fixtures/optimizer/tpc-ds/tpc-ds.sql | 22 +-- tests/test_lineage.py | 12 +- tests/test_optimizer.py | 34 ++--- 11 files changed, 181 insertions(+), 183 deletions(-) diff --git a/sqlglot/optimizer/qualify.py b/sqlglot/optimizer/qualify.py index d7bf7aa9b7..c3aec69399 100644 --- a/sqlglot/optimizer/qualify.py +++ b/sqlglot/optimizer/qualify.py @@ -84,7 +84,7 @@ def qualify( catalog=catalog, dialect=dialect, on_qualify=on_qualify, - canonicalize=canonicalize_table_aliases, + canonicalize_table_aliases=canonicalize_table_aliases, ) if isolate_tables: diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index a8d543347f..aa38c6cd0c 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -18,7 +18,7 @@ def qualify_tables( catalog: t.Optional[str | exp.Identifier] = None, on_qualify: t.Optional[t.Callable[[exp.Expression], None]] = None, dialect: DialectType = None, - canonicalize: bool = False, + canonicalize_table_aliases: bool = False, ) -> E: """ Rewrite sqlglot AST to have fully qualified tables. Join constructs such as @@ -40,18 +40,14 @@ def qualify_tables( catalog: Catalog name on_qualify: Callback after a table has been qualified. dialect: The dialect to parse catalog and schema into. - canonicalize: Whether to use canonical aliases (_0, _1, ...) for all sources + canonicalize_table_aliases: Whether to use canonical aliases (_0, _1, ...) for all sources instead of preserving table names. Defaults to False. Returns: The qualified expression. """ dialect = Dialect.get_or_raise(dialect) - - alias_sequence = name_sequence("_" if canonicalize else "_q_") - - def next_alias_name() -> str: - return normalize_identifiers(alias_sequence(), dialect=dialect).name + next_alias_name = name_sequence("_") if db := db or None: db = exp.parse_identifier(db, dialect=dialect) @@ -87,17 +83,19 @@ def _set_alias( ) -> None: alias = expression.args.get("alias") or exp.TableAlias() - if canonicalize: + if canonicalize_table_aliases: + quoted: t.Optional[bool] = True new_alias_name = next_alias_name() canonical_aliases[alias.name or target_alias or ""] = new_alias_name elif not alias.name: + quoted = None if target_alias else True new_alias_name = target_alias or next_alias_name() if normalize: new_alias_name = normalize_identifiers(new_alias_name, dialect=dialect).name else: return - alias.set("this", exp.to_identifier(new_alias_name)) + alias.set("this", exp.to_identifier(new_alias_name, quoted=quoted)) expression.set("alias", alias) if scope: diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 88dff813a2..918983a788 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -33,7 +33,7 @@ FROM ( WHERE object_pointstext IS NOT NULL ); CREATE OR REPLACE TEMPORARY VIEW `latest_boo` AS -WITH `_q_1` AS ( +WITH `_1` AS ( SELECT EXPLODE_OUTER(SPLIT(`boo`.`object_pointstext`, ',')) AS `points` FROM `boo` AS `boo` @@ -41,9 +41,9 @@ WITH `_q_1` AS ( NOT `boo`.`object_pointstext` IS NULL ) SELECT - TRIM(SPLIT(`_q_1`.`points`, ':')[0]) AS `points_type`, - TRIM(SPLIT(`_q_1`.`points`, ':')[1]) AS `points_value` -FROM `_q_1` AS `_q_1`; + TRIM(SPLIT(`_1`.`points`, ':')[0]) AS `points_type`, + TRIM(SPLIT(`_1`.`points`, ':')[1]) AS `points_value` +FROM `_1` AS `_1`; # title: Union in CTE WITH cte AS ( @@ -545,52 +545,52 @@ QUALIFY # execute: false SELECT * FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); SELECT - "_q_1"."a" AS "a", - "_q_1"."x" AS "x", - "_q_1"."y" AS "y", - "_q_1"."z" AS "z" + "_1"."a" AS "a", + "_1"."x" AS "x", + "_1"."y" AS "y", + "_1"."z" AS "z" FROM ( SELECT "tb"."a" AS "a", "tb"."b" AS "b", "tb"."c" AS "c" FROM "sc"."tb" AS "tb" -) AS "_q_0" -PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; +) AS "_0" +PIVOT(SUM("_0"."c") FOR "_0"."b" IN ('x', 'y', 'z')) AS "_1"; # title: pivoted source with explicit selections where one of them is excluded & selected at the same time # note: we need to respect the exclude when selecting * from pivoted source and not include the computed column twice # execute: false SELECT * EXCEPT (x), CAST(x AS TEXT) AS x FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); SELECT - "_q_1"."a" AS "a", - "_q_1"."y" AS "y", - "_q_1"."z" AS "z", - CAST("_q_1"."x" AS TEXT) AS "x" + "_1"."a" AS "a", + "_1"."y" AS "y", + "_1"."z" AS "z", + CAST("_1"."x" AS TEXT) AS "x" FROM ( SELECT "tb"."a" AS "a", "tb"."b" AS "b", "tb"."c" AS "c" FROM "sc"."tb" AS "tb" -) AS "_q_0" -PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; +) AS "_0" +PIVOT(SUM("_0"."c") FOR "_0"."b" IN ('x', 'y', 'z')) AS "_1"; # title: pivoted source with implicit selections # execute: false SELECT * FROM (SELECT * FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - "_q_1"."g" AS "g", - "_q_1"."x" AS "x", - "_q_1"."y" AS "y" + "_1"."g" AS "g", + "_1"."x" AS "x", + "_1"."y" AS "y" FROM ( SELECT "u"."f" AS "f", "u"."g" AS "g", "u"."h" AS "h" FROM "u" AS "u" -) AS "_q_0" -PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1"; +) AS "_0" +PIVOT(SUM("_0"."f") FOR "_0"."h" IN ('x', 'y')) AS "_1"; # title: selecting explicit qualified columns from pivoted source with explicit selections # execute: false @@ -603,17 +603,17 @@ FROM ( "u"."f" AS "f", "u"."h" AS "h" FROM "u" AS "u" -) AS "_q_0" -PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv"; +) AS "_0" +PIVOT(SUM("_0"."f") FOR "_0"."h" IN ('x', 'y')) AS "piv"; # title: selecting explicit unqualified columns from pivoted source with implicit selections # execute: false SELECT x, y FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - "_q_0"."x" AS "x", - "_q_0"."y" AS "y" + "_0"."x" AS "x", + "_0"."y" AS "y" FROM "u" AS "u" -PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; +PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_0"; # title: selecting all columns from a pivoted CTE source, using alias for the aggregation and generating bigquery # execute: false @@ -627,22 +627,22 @@ WITH `u_cte` AS ( FROM `u` AS `u` ) SELECT - `_q_0`.`g` AS `g`, - `_q_0`.`sum_x` AS `sum_x`, - `_q_0`.`sum_y` AS `sum_y` + `_0`.`g` AS `g`, + `_0`.`sum_x` AS `sum_x`, + `_0`.`sum_y` AS `sum_y` FROM `u_cte` AS `u_cte` -PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_q_0`; +PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_0`; # title: selecting all columns from a pivoted source and generating snowflake # execute: false # dialect: snowflake SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - "_Q_0"."G" AS "G", - "_Q_0"."'x'" AS "'x'", - "_Q_0"."'y'" AS "'y'" + "_0"."G" AS "G", + "_0"."'x'" AS "'x'", + "_0"."'y'" AS "'y'" FROM "U" AS "U" -PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_Q_0"; +PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_0"; # title: selecting all columns from a pivoted source and generating spark # note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`) @@ -650,15 +650,15 @@ PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_Q_0"; # dialect: spark SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - `_q_0`.`g` AS `g`, - `_q_0`.`x` AS `x`, - `_q_0`.`y` AS `y` + `_0`.`g` AS `g`, + `_0`.`x` AS `x`, + `_0`.`y` AS `y` FROM ( SELECT * FROM `u` AS `u` PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y')) -) AS `_q_0`; +) AS `_0`; # title: selecting all columns from a pivoted source, pivot has column aliases # execute: false @@ -730,10 +730,10 @@ UNPIVOT("sales" FOR "month" IN ("m_sales"."jan", "m_sales"."feb")) AS "unpiv"("a # dialect: snowflake SELECT * FROM (SELECT * FROM m_sales) AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid; SELECT - "_Q_0"."EMPID" AS "EMPID", - "_Q_0"."DEPT" AS "DEPT", - "_Q_0"."MONTH" AS "MONTH", - "_Q_0"."SALES" AS "SALES" + "_0"."EMPID" AS "EMPID", + "_0"."DEPT" AS "DEPT", + "_0"."MONTH" AS "MONTH", + "_0"."SALES" AS "SALES" FROM ( SELECT "M_SALES"."EMPID" AS "EMPID", @@ -742,9 +742,9 @@ FROM ( "M_SALES"."FEB" AS "FEB" FROM "M_SALES" AS "M_SALES" ) AS "M_SALES" -UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_Q_0" +UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_0" ORDER BY - "_Q_0"."EMPID"; + "_0"."EMPID"; # title: unpivoted table source with a single value column, unpivot columns can be qualified # execute: false @@ -987,7 +987,7 @@ LEFT JOIN ( # title: select * from wrapped subquery # execute: false SELECT * FROM ((SELECT * FROM tbl)); -WITH "_q_0" AS ( +WITH "_0" AS ( SELECT * FROM "tbl" AS "tbl" @@ -995,7 +995,7 @@ WITH "_q_0" AS ( SELECT * FROM ( - "_q_0" AS "_q_0" + "_0" AS "_0" ); # title: select * from wrapped subquery joined to a table (known schema) @@ -1014,7 +1014,7 @@ FROM ( # title: select * from wrapped subquery joined to a table (unknown schema) # execute: false SELECT * FROM ((SELECT c FROM t1) JOIN t2); -WITH "_q_0" AS ( +WITH "_0" AS ( SELECT "t1"."c" AS "c" FROM "t1" AS "t1" @@ -1022,7 +1022,7 @@ WITH "_q_0" AS ( SELECT * FROM ( - "_q_0" AS "_q_0" + "_0" AS "_0" CROSS JOIN "t2" AS "t2" ); @@ -1039,11 +1039,11 @@ FROM ( # title: select * from wrapped join of subqueries (unknown schema) # execute: false SELECT * FROM ((SELECT * FROM t1) JOIN (SELECT * FROM t2)); -WITH "_q_0" AS ( +WITH "_0" AS ( SELECT * FROM "t1" AS "t1" -), "_q_1" AS ( +), "_1" AS ( SELECT * FROM "t2" AS "t2" @@ -1051,8 +1051,8 @@ WITH "_q_0" AS ( SELECT * FROM ( - "_q_0" AS "_q_0" - CROSS JOIN "_q_1" AS "_q_1" + "_0" AS "_0" + CROSS JOIN "_1" AS "_1" ); # title: select * from wrapped join of subqueries (known schema) @@ -1211,17 +1211,17 @@ SELECT Name, (SELECT Name, explode(Fruits) as FruitStruct FROM fruits_table); -WITH `_q_0` AS ( +WITH `_0` AS ( SELECT `fruits_table`.`name` AS `name`, EXPLODE(`fruits_table`.`fruits`) AS `fruitstruct` FROM `fruits_table` AS `fruits_table` ) SELECT - `_q_0`.`name` AS `name`, - `_q_0`.`fruitstruct`.`$id` AS `$id`, - `_q_0`.`fruitstruct`.`value` AS `value` -FROM `_q_0` AS `_q_0`; + `_0`.`name` AS `name`, + `_0`.`fruitstruct`.`$id` AS `$id`, + `_0`.`fruitstruct`.`value` AS `value` +FROM `_0` AS `_0`; # title: mysql is case-sensitive by default # dialect: mysql @@ -1471,7 +1471,7 @@ WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' AS (y WITH `a` AS ( SELECT 'v' AS `x` -), `_q_0` AS ( +), `_0` AS ( SELECT TRANSFORM(`a`.`x`) USING 'cat' AS ( `y` STRING @@ -1479,8 +1479,8 @@ WITH `a` AS ( FROM `a` AS `a` ) SELECT - `_q_0`.`y` AS `y` -FROM `_q_0` AS `_q_0`; + `_0`.`y` AS `y` +FROM `_0` AS `_0`; # title: SELECT TRANSFORM ... Spark clause when schema is not provided # execute: false @@ -1489,15 +1489,15 @@ WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' FROM WITH `a` AS ( SELECT 'v' AS `x` -), `_q_0` AS ( +), `_0` AS ( SELECT TRANSFORM(`a`.`x`) USING 'cat' FROM `a` AS `a` ) SELECT - `_q_0`.`key` AS `key`, - `_q_0`.`value` AS `value` -FROM `_q_0` AS `_q_0`; + `_0`.`key` AS `key`, + `_0`.`value` AS `value` +FROM `_0` AS `_0`; # title: avoid reordering of non inner joins # execute: true diff --git a/tests/fixtures/optimizer/pushdown_projections.sql b/tests/fixtures/optimizer/pushdown_projections.sql index 03c42fb863..6921afa9f4 100644 --- a/tests/fixtures/optimizer/pushdown_projections.sql +++ b/tests/fixtures/optimizer/pushdown_projections.sql @@ -1,8 +1,8 @@ SELECT a FROM (SELECT * FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT x.a AS a FROM x AS x) AS "_0"; SELECT 1 FROM (SELECT * FROM x) WHERE b = 2; -SELECT 1 AS "1" FROM (SELECT x.b AS b FROM x AS x) AS _q_0 WHERE _q_0.b = 2; +SELECT 1 AS "1" FROM (SELECT x.b AS b FROM x AS x) AS "_0" WHERE "_0".b = 2; SELECT a, b, a from x; SELECT x.a AS a, x.b AS b, x.a AS a FROM x AS x; @@ -17,16 +17,16 @@ SELECT x1.a FROM (SELECT * FROM x) AS x1, (SELECT * FROM x) AS x2; SELECT x1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS x1, (SELECT 1 AS _ FROM x AS x) AS x2; SELECT a FROM (SELECT DISTINCT a, b FROM x); -SELECT _q_0.a AS a FROM (SELECT DISTINCT x.a AS a, x.b AS b FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT DISTINCT x.a AS a, x.b AS b FROM x AS x) AS "_0"; SELECT a FROM (SELECT a, b FROM x UNION ALL SELECT a, b FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION ALL SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT x.a AS a FROM x AS x UNION ALL SELECT x.a AS a FROM x AS x) AS "_0"; WITH t1 AS (SELECT x.a AS a, x.b AS b FROM x UNION ALL SELECT z.b AS b, z.c AS c FROM z) SELECT a, b FROM t1; WITH t1 AS (SELECT x.a AS a, x.b AS b FROM x AS x UNION ALL SELECT z.b AS b, z.c AS c FROM z AS z) SELECT t1.a AS a, t1.b AS b FROM t1 AS t1; SELECT a FROM (SELECT a, b FROM x UNION SELECT a, b FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x UNION SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x UNION SELECT x.a AS a, x.b AS b FROM x AS x) AS "_0"; WITH y AS (SELECT * FROM x) SELECT a FROM y; WITH y AS (SELECT x.a AS a FROM x AS x) SELECT y.a AS a FROM y AS y; @@ -38,10 +38,10 @@ WITH z AS (SELECT * FROM x) SELECT a FROM z UNION SELECT a FROM z; WITH z AS (SELECT x.a AS a FROM x AS x) SELECT z.a AS a FROM z AS z UNION SELECT z.a AS a FROM z AS z; SELECT b FROM (SELECT a, SUM(b) AS b FROM x GROUP BY a); -SELECT _q_0.b AS b FROM (SELECT SUM(x.b) AS b FROM x AS x GROUP BY x.a) AS _q_0; +SELECT "_0".b AS b FROM (SELECT SUM(x.b) AS b FROM x AS x GROUP BY x.a) AS "_0"; SELECT b FROM (SELECT a, SUM(b) AS b FROM x ORDER BY a); -SELECT _q_0.b AS b FROM (SELECT x.a AS a, SUM(x.b) AS b FROM x AS x ORDER BY a) AS _q_0; +SELECT "_0".b AS b FROM (SELECT x.a AS a, SUM(x.b) AS b FROM x AS x ORDER BY a) AS "_0"; SELECT x FROM (VALUES(1, 2)) AS q(x, y); SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y); @@ -56,7 +56,7 @@ SELECT x FROM VALUES(1, 2) AS q(x, y); SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y); SELECT i.a FROM x AS i LEFT JOIN (SELECT a, b FROM (SELECT a, b FROM x)) AS j ON i.a = j.a; -SELECT i.a AS a FROM x AS i LEFT JOIN (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) AS j ON i.a = j.a; +SELECT i.a AS a FROM x AS i LEFT JOIN (SELECT "_0".a AS a FROM (SELECT x.a AS a FROM x AS x) AS "_0") AS j ON i.a = j.a; WITH cte AS (SELECT source.a AS a, ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC) AS index FROM source AS source QUALIFY index) SELECT cte.a AS a FROM cte; WITH cte AS (SELECT source.a AS a FROM source AS source QUALIFY ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC)) SELECT cte.a AS a FROM cte AS cte; @@ -87,31 +87,31 @@ WITH cte AS (SELECT t.col AS col FROM t AS t) SELECT CASE WHEN 1 IN (SELECT UNNE -------------------------------------- SELECT a FROM (SELECT * FROM zz) WHERE b = 1; -SELECT _q_0.a AS a FROM (SELECT zz.a AS a, zz.b AS b FROM zz AS zz) AS _q_0 WHERE _q_0.b = 1; +SELECT "_0".a AS a FROM (SELECT zz.a AS a, zz.b AS b FROM zz AS zz) AS "_0" WHERE "_0".b = 1; SELECT a FROM (SELECT * FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc); -SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _q_0; +SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS "_0"; SELECT a FROM (SELECT a FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc); -SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _q_0; +SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS "_0"; SELECT a FROM (SELECT * FROM aa CROSS JOIN bb); -SELECT _q_0.a AS a FROM (SELECT a AS a FROM aa AS aa CROSS JOIN bb AS bb) AS _q_0; +SELECT "_0".a AS a FROM (SELECT a AS a FROM aa AS aa CROSS JOIN bb AS bb) AS "_0"; SELECT a FROM (SELECT aa.* FROM aa); -SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS _q_0; +SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS "_0"; SELECT a FROM (SELECT * FROM (SELECT * FROM aa)); -SELECT _q_1.a AS a FROM (SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS _q_0) AS _q_1; +SELECT "_1".a AS a FROM (SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS "_0") AS "_1"; with cte1 as (SELECT cola, colb FROM tb UNION ALL SELECT colc, cold FROM tb2) SELECT cola FROM cte1; WITH cte1 AS (SELECT tb.cola AS cola FROM tb AS tb UNION ALL SELECT tb2.colc AS colc FROM tb2 AS tb2) SELECT cte1.cola AS cola FROM cte1 AS cte1; SELECT * FROM ((SELECT c FROM t1) JOIN t2); -SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2); +SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS "_0", t2 AS t2); SELECT a, d FROM (SELECT 1 a, 2 c, 3 d, 4 e UNION ALL BY NAME SELECT 6 c, 7 d, 8 a, 9 e); -SELECT _q_0.a AS a, _q_0.d AS d FROM (SELECT 1 AS a, 3 AS d UNION ALL BY NAME SELECT 7 AS d, 8 AS a) AS _q_0; +SELECT "_0".a AS a, "_0".d AS d FROM (SELECT 1 AS a, 3 AS d UNION ALL BY NAME SELECT 7 AS d, 8 AS a) AS "_0"; SELECT a, b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) (SELECT a, b, c FROM cte1)); -SELECT _q_0.a AS a, _q_0.b AS b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b) SELECT cte1.a AS a, cte1.b AS b FROM cte1 AS cte1) AS _q_0; \ No newline at end of file +SELECT "_0".a AS a, "_0".b AS b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b) SELECT cte1.a AS a, cte1.b AS b FROM cte1 AS cte1) AS "_0"; \ No newline at end of file diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 6676812a77..9e1a8fe922 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -129,7 +129,7 @@ SELECT DATE(x.a) AS _col_0, DATE(x.b) AS c FROM x AS x GROUP BY DATE(x.a), DATE( # execute: false SELECT (SELECT MIN(a) FROM UNNEST([1, 2])) AS f FROM x GROUP BY 1; -SELECT (SELECT MIN(_q_0.a) AS _col_0 FROM UNNEST(ARRAY(1, 2)) AS _q_0) AS f FROM x AS x GROUP BY 1; +SELECT (SELECT MIN(_0.a) AS _col_0 FROM UNNEST(ARRAY(1, 2)) AS _0) AS f FROM x AS x GROUP BY 1; # dialect: bigquery WITH x AS (select 'a' as a, 1 as b) SELECT x.a AS c, y.a as d, SUM(x.b) AS y, FROM x join x as y on x.a = y.a group by 1, 2; @@ -176,11 +176,11 @@ SELECT DATE_TRUNC(x.a, MONTH) AS a FROM x AS x; # execute: false SELECT x FROM READ_PARQUET('path.parquet', hive_partition=1); -SELECT _q_0.x AS x FROM READ_PARQUET('path.parquet', hive_partition = 1) AS _q_0; +SELECT _0.x AS x FROM READ_PARQUET('path.parquet', hive_partition = 1) AS _0; # execute: false select * from (values (1, 2)); -SELECT _q_0._col_0 AS _col_0, _q_0._col_1 AS _col_1 FROM (VALUES (1, 2)) AS _q_0(_col_0, _col_1); +SELECT _0._col_0 AS _col_0, _0._col_1 AS _col_1 FROM (VALUES (1, 2)) AS _0(_col_0, _col_1); # execute: false select * from (values (1, 2)) x; @@ -267,7 +267,7 @@ SELECT g.generate_series AS generate_series FROM generate_series(0, 10) AS g(gen # execute: false # dialect: snowflake SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ORDER BY empid; -SELECT * FROM QUARTERLY_SALES AS QUARTERLY_SALES PIVOT(SUM(QUARTERLY_SALES.AMOUNT) FOR QUARTERLY_SALES.QUARTER IN (ANY ORDER BY QUARTER)) AS _Q_0 ORDER BY _Q_0.EMPID; +SELECT * FROM QUARTERLY_SALES AS QUARTERLY_SALES PIVOT(SUM(QUARTERLY_SALES.AMOUNT) FOR QUARTERLY_SALES.QUARTER IN (ANY ORDER BY QUARTER)) AS _0 ORDER BY _0.EMPID; # execute: false SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x) AS x FROM t; @@ -303,10 +303,10 @@ SELECT a FROM (SELECT a FROM x AS x) y; SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y; SELECT a FROM (SELECT a AS a FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT a FROM (SELECT a FROM (SELECT a FROM x)); -SELECT _q_1.a AS a FROM (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) AS _q_1; +SELECT _1.a AS a FROM (SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0) AS _1; SELECT x.a FROM x AS x JOIN (SELECT * FROM x) AS y ON x.a = y.a; SELECT x.a AS a FROM x AS x JOIN (SELECT x.a AS a, x.b AS b FROM x AS x) AS y ON x.a = y.a; @@ -333,7 +333,7 @@ SELECT a FROM x UNION SELECT a FROM x UNION SELECT a FROM x ORDER BY a; SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x ORDER BY a; SELECT a FROM (SELECT a FROM x UNION SELECT a FROM x) ORDER BY a; -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x) AS _q_0 ORDER BY a; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x) AS _0 ORDER BY a; # title: nested subqueries in union ((select a from x where a < 1)) UNION ((select a from x where a > 2)); @@ -343,79 +343,79 @@ SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar, _q_0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar, _0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar, _q_0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar, _0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL CORRESPONDING BY (foo, bar) SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar); -SELECT _q_0.bar AS bar FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar) AS _q_0; +SELECT _0.bar AS bar FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar) AS _0; # dialect: bigquery # execute: false SELECT * FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar); -SELECT _q_0.foo AS foo, _q_0.qux AS qux FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) AS _q_0; +SELECT _0.foo AS foo, _0.qux AS qux FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) AS _0; # dialect: bigquery # execute: false SELECT * FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo); -SELECT _q_0.foo AS foo FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo) AS _q_0; +SELECT _0.foo AS foo FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo) AS _0; # Title: Nested set operations with modifiers # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 FULL OUTER UNION ALL BY NAME (SELECT * FROM t3 FULL OUTER UNION ALL BY NAME SELECT * FROM t4)))); -WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _q_0.a AS a, _q_0.b AS b, _q_0.c AS c, _q_0.d AS d, _q_0.e AS e, _q_0.f AS f FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL OUTER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _q_0); +WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _0.a AS a, _0.b AS b, _0.c AS c, _0.d AS d, _0.e AS e, _0.f AS f FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL OUTER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _0); # Title: Nested set operations with different modifiers (FULL + INNER) # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 INNER UNION ALL BY NAME (SELECT * FROM t3 FULL OUTER UNION ALL BY NAME SELECT * FROM t4)))); -WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _q_0.a AS a, _q_0.b AS b, _q_0.c AS c FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 INNER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _q_0); +WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _0.a AS a, _0.b AS b, _0.c AS c FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 INNER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _0); # Title: Nested set operations with different modifiers (FULL + LEFT) # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 FULL UNION ALL BY NAME (SELECT * FROM t3 LEFT UNION ALL BY NAME SELECT * FROM t4)))); -WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT _q_0.a AS a, _q_0.b AS b, _q_0.c AS c, _q_0.d AS d FROM ((SELECT t1.a AS a, t1.b AS b, t1.c AS c, t1.d AS d FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 LEFT UNION ALL BY NAME SELECT t4.d AS d, t4.e AS e FROM t4 AS t4))) AS _q_0); +WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT _0.a AS a, _0.b AS b, _0.c AS c, _0.d AS d FROM ((SELECT t1.a AS a, t1.b AS b, t1.c AS c, t1.d AS d FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 LEFT UNION ALL BY NAME SELECT t4.d AS d, t4.e AS e FROM t4 AS t4))) AS _0); -------------------------------------- -- Subqueries @@ -436,7 +436,7 @@ WITH t1(c1) AS (SELECT 1), t2(c2) AS (SELECT 2) SELECT (SELECT c1 FROM t2) FROM WITH t1 AS (SELECT 1 AS c1), t2 AS (SELECT 2 AS c2) SELECT (SELECT t1.c1 AS c1 FROM t2 AS t2) AS _col_0 FROM t1 AS t1; SELECT a FROM (SELECT a FROM x) WHERE a IN (SELECT b FROM (SELECT b FROM y)); -SELECT _q_1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_1 WHERE _q_1.a IN (SELECT _q_0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _q_0); +SELECT _1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _1 WHERE _1.a IN (SELECT _0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _0); # dialect: mysql # execute: false @@ -485,10 +485,10 @@ SELECT x.*, y.* FROM x JOIN y ON x.b = y.b; SELECT x.a AS a, x.b AS b, y.b AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; SELECT a FROM (SELECT * FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS _0; SELECT * FROM (SELECT a FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT * FROM x GROUP BY 1, 2; SELECT x.a AS a, x.b AS b FROM x AS x GROUP BY x.a, x.b; @@ -565,30 +565,30 @@ SELECT ((structs.nested_0).nested_1).a_2 AS a_2, ((structs.nested_0).nested_1).n # title: CSV files are not scanned by default # execute: false SELECT * FROM READ_CSV('file.csv'); -SELECT * FROM READ_CSV('file.csv') AS _q_0; +SELECT * FROM READ_CSV('file.csv') AS _0; # dialect: clickhouse # Title: Expand tuples in VALUES using the structure provided # execute: false SELECT * FROM VALUES ('person String, place String', ('Noah', 'Paris')); -SELECT _q_0.person AS person, _q_0.place AS place FROM VALUES ('person String, place String', ('Noah', 'Paris')) AS _q_0(person, place); +SELECT _0.person AS person, _0.place AS place FROM VALUES ('person String, place String', ('Noah', 'Paris')) AS _0(person, place); # dialect: clickhouse # Title: Expand tuples in VALUES using the default naming scheme in CH # execute: false SELECT * FROM VALUES ((1, 1), (2, 2)); -SELECT _q_0.c1 AS c1, _q_0.c2 AS c2 FROM VALUES ((1, 1), (2, 2)) AS _q_0(c1, c2); +SELECT _0.c1 AS c1, _0.c2 AS c2 FROM VALUES ((1, 1), (2, 2)) AS _0(c1, c2); # dialect: clickhouse # Title: Expand fields in VALUES using the default naming scheme in CH # execute: false SELECT * FROM VALUES (1, 2, 3); -SELECT _q_0.c1 AS c1 FROM VALUES ((1), (2), (3)) AS _q_0(c1); +SELECT _0.c1 AS c1 FROM VALUES ((1), (2), (3)) AS _0(c1); # title: Expand PIVOT column combinations # dialect: duckdb WITH cities AS (SELECT * FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT * FROM cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')); -WITH cities AS (SELECT t.country AS country, t.name AS name, t.year AS year, t.population AS population FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT _q_0.nl_2000_amsterdam_total AS nl_2000_amsterdam_total, _q_0.nl_2000_amsterdam_count AS nl_2000_amsterdam_count, _q_0.nl_2000_seattle_total AS nl_2000_seattle_total, _q_0.nl_2000_seattle_count AS nl_2000_seattle_count, _q_0.nl_2010_amsterdam_total AS nl_2010_amsterdam_total, _q_0.nl_2010_amsterdam_count AS nl_2010_amsterdam_count, _q_0.nl_2010_seattle_total AS nl_2010_seattle_total, _q_0.nl_2010_seattle_count AS nl_2010_seattle_count, _q_0.us_2000_amsterdam_total AS us_2000_amsterdam_total, _q_0.us_2000_amsterdam_count AS us_2000_amsterdam_count, _q_0.us_2000_seattle_total AS us_2000_seattle_total, _q_0.us_2000_seattle_count AS us_2000_seattle_count, _q_0.us_2010_amsterdam_total AS us_2010_amsterdam_total, _q_0.us_2010_amsterdam_count AS us_2010_amsterdam_count, _q_0.us_2010_seattle_total AS us_2010_seattle_total, _q_0.us_2010_seattle_count AS us_2010_seattle_count FROM cities AS cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')) AS _q_0; +WITH cities AS (SELECT t.country AS country, t.name AS name, t.year AS year, t.population AS population FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT _0.nl_2000_amsterdam_total AS nl_2000_amsterdam_total, _0.nl_2000_amsterdam_count AS nl_2000_amsterdam_count, _0.nl_2000_seattle_total AS nl_2000_seattle_total, _0.nl_2000_seattle_count AS nl_2000_seattle_count, _0.nl_2010_amsterdam_total AS nl_2010_amsterdam_total, _0.nl_2010_amsterdam_count AS nl_2010_amsterdam_count, _0.nl_2010_seattle_total AS nl_2010_seattle_total, _0.nl_2010_seattle_count AS nl_2010_seattle_count, _0.us_2000_amsterdam_total AS us_2000_amsterdam_total, _0.us_2000_amsterdam_count AS us_2000_amsterdam_count, _0.us_2000_seattle_total AS us_2000_seattle_total, _0.us_2000_seattle_count AS us_2000_seattle_count, _0.us_2010_amsterdam_total AS us_2010_amsterdam_total, _0.us_2010_amsterdam_count AS us_2010_amsterdam_count, _0.us_2010_seattle_total AS us_2010_seattle_total, _0.us_2010_seattle_count AS us_2010_seattle_count FROM cities AS cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')) AS _0; -------------------------------------- -- CTEs @@ -787,11 +787,11 @@ SELECT /*+ BROADCAST(y) */ x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b; -------------------------------------- # execute: false SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; -SELECT _q_0.c AS c FROM x AS x LATERAL VIEW EXPLODE(x.a) _q_0 AS c; +SELECT _0.c AS c FROM x AS x LATERAL VIEW EXPLODE(x.a) _0 AS c; # execute: false SELECT c FROM xx LATERAL VIEW EXPLODE (a) AS c; -SELECT _q_0.c AS c FROM xx AS xx LATERAL VIEW EXPLODE(xx.a) _q_0 AS c; +SELECT _0.c AS c FROM xx AS xx LATERAL VIEW EXPLODE(xx.a) _0 AS c; # execute: false SELECT c FROM x LATERAL VIEW EXPLODE (a) t AS c; @@ -832,7 +832,7 @@ SELECT x.a AS a, i.b AS b FROM x AS x CROSS JOIN UNNEST(SPLIT(CAST(x.b AS VARCHA # execute: false SELECT c FROM (SELECT 1 a) AS x LATERAL VIEW EXPLODE(a) AS c; -SELECT _q_0.c AS c FROM (SELECT 1 AS a) AS x LATERAL VIEW EXPLODE(x.a) _q_0 AS c; +SELECT _0.c AS c FROM (SELECT 1 AS a) AS x LATERAL VIEW EXPLODE(x.a) _0 AS c; # execute: false SELECT * FROM foo(bar) AS t(c1, c2, c3); @@ -908,7 +908,7 @@ FROM ( i + 1 AS j FROM x ); -SELECT _q_0.i AS i, _q_0.j AS j FROM (SELECT x.a + 1 AS i, x.a + 1 + 1 AS j FROM x AS x) AS _q_0; +SELECT _0.i AS i, _0.j AS j FROM (SELECT x.a + 1 AS i, x.a + 1 + 1 AS j FROM x AS x) AS _0; # title: wrap expanded alias to ensure operator precedence isnt broken # execute: false @@ -956,7 +956,7 @@ WITH RECURSIVE rec AS (SELECT id, parent_id AS parent, 1 AS level FROM (SELECT 1 WITH RECURSIVE rec AS (SELECT t.id AS id, t.parent_id AS parent, 1 AS level FROM (SELECT 1 AS id, 0 AS parent_id) AS t WHERE t.parent_id = 0 UNION ALL SELECT s.num AS num, s.val AS x, 2 AS level FROM (SELECT 2 AS num, 1 AS val) AS s WHERE s.val = 1 UNION ALL SELECT rec.id + 10 AS id, rec.id AS parent, rec.level + 1 AS level FROM rec AS rec WHERE rec.level < 3) SELECT rec.id AS id, rec.parent AS parent, rec.level AS level FROM rec AS rec ORDER BY rec.id; WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT * FROM (SELECT c + 1 AS c FROM t WHERE c <= 3 UNION ALL SELECT c + 2 AS c FROM t WHERE c <= 3)) SELECT c FROM t ORDER BY c; -WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT _q_0.c AS c FROM (SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 3 UNION ALL SELECT t.c + 2 AS c FROM t AS t WHERE t.c <= 3) AS _q_0) SELECT t.c AS c FROM t AS t ORDER BY c; +WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT _0.c AS c FROM (SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 3 UNION ALL SELECT t.c + 2 AS c FROM t AS t WHERE t.c <= 3) AS _0) SELECT t.c AS c FROM t AS t ORDER BY c; -------------------------------------- -- Wrapped tables / join constructs @@ -974,24 +974,24 @@ SELECT * FROM ((a AS a CROSS JOIN ((b AS b CROSS JOIN c AS c) CROSS JOIN (d AS d # execute: false SELECT * FROM ((SELECT * FROM tbl)); -SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _q_0); +SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _0); # execute: false SELECT * FROM ((SELECT c FROM t1) CROSS JOIN t2); -SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0 CROSS JOIN t2 AS t2); +SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _0 CROSS JOIN t2 AS t2); # execute: false SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c); -SELECT y.b AS b, y.c AS c, _q_0.a AS a, _q_0.b AS b FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.c); +SELECT y.b AS b, y.c AS c, _0.a AS a, _0.b AS b FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _0 INNER JOIN y AS y ON _0.a = y.c); SELECT x.a, y.b, z.c FROM x LEFT JOIN (y INNER JOIN z ON y.c = z.c) ON x.b = y.b; SELECT x.a AS a, y.b AS b, z.c AS c FROM x AS x LEFT JOIN (y AS y INNER JOIN z AS z ON y.c = z.c) ON x.b = y.b; SELECT * FROM ((SELECT * FROM x) INNER JOIN (SELECT * FROM y) ON a = c); -SELECT _q_0.a AS a, _q_0.b AS b, _q_1.b AS b, _q_1.c AS c FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0 INNER JOIN (SELECT y.b AS b, y.c AS c FROM y AS y) AS _q_1 ON _q_0.a = _q_1.c); +SELECT _0.a AS a, _0.b AS b, _1.b AS b, _1.c AS c FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _0 INNER JOIN (SELECT y.b AS b, y.c AS c FROM y AS y) AS _1 ON _0.a = _1.c); SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b); -SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.b); +SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _0 INNER JOIN y AS y ON _0.a = y.b); SELECT a, c FROM x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y TABLESAMPLE SYSTEM (10 ROWS); SELECT x.a AS a, y.c AS c FROM x AS x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y AS y TABLESAMPLE SYSTEM (10 ROWS); diff --git a/tests/fixtures/optimizer/qualify_columns__with_invisible.sql b/tests/fixtures/optimizer/qualify_columns__with_invisible.sql index 1e070158e2..a65b1834bb 100644 --- a/tests/fixtures/optimizer/qualify_columns__with_invisible.sql +++ b/tests/fixtures/optimizer/qualify_columns__with_invisible.sql @@ -11,10 +11,10 @@ SELECT x.b AS b FROM x AS x; -- Derived tables -------------------------------------- SELECT x.a FROM x AS x CROSS JOIN (SELECT * FROM x); -SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT x.a AS a FROM x AS x) AS _0; SELECT x.b FROM x AS x CROSS JOIN (SELECT b FROM x); -SELECT x.b AS b FROM x AS x CROSS JOIN (SELECT x.b AS b FROM x AS x) AS _q_0; +SELECT x.b AS b FROM x AS x CROSS JOIN (SELECT x.b AS b FROM x AS x) AS _0; -------------------------------------- -- Expand * @@ -29,7 +29,7 @@ SELECT * FROM y CROSS JOIN z ON y.c = z.c; SELECT y.b AS b, z.b AS b FROM y AS y CROSS JOIN z AS z ON y.c = z.c; SELECT a FROM (SELECT * FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT * FROM (SELECT a FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; diff --git a/tests/fixtures/optimizer/qualify_columns_ddl.sql b/tests/fixtures/optimizer/qualify_columns_ddl.sql index 75d84ca2ff..7b38b8eb86 100644 --- a/tests/fixtures/optimizer/qualify_columns_ddl.sql +++ b/tests/fixtures/optimizer/qualify_columns_ddl.sql @@ -12,7 +12,7 @@ CREATE TABLE foo AS SELECT tbl.a AS a FROM tbl AS tbl; # title: Create with complex CTE with derived table WITH cte AS (SELECT a FROM (SELECT a FROM x)) CREATE TABLE s AS SELECT * FROM cte; -WITH cte AS (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) CREATE TABLE s AS SELECT cte.a AS a FROM cte AS cte; +WITH cte AS (SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0) CREATE TABLE s AS SELECT cte.a AS a FROM cte AS cte; # title: Create wtih multiple CTEs WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte2; @@ -24,11 +24,11 @@ WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte # title: Create with multiple derived tables CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM y)); -CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _q_0) AS _q_1; +CREATE TABLE s AS SELECT _1.b AS b FROM (SELECT _0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _0) AS _1; # title: Create with a CTE and a derived table WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM cte)); -WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _q_0) AS _q_1; +WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _1.b AS b FROM (SELECT _0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _0) AS _1; # title: Insert with CTE # dialect: spark diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 662f7c37c6..aee131ae1f 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -68,7 +68,7 @@ SELECT (SELECT y.c FROM c.db.y AS y) FROM c.db.x AS x; # title: pivoted table SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')); -SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS _q_0; +SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS "_0"; # title: pivoted table, pivot has alias SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')) AS piv; @@ -135,28 +135,28 @@ SELECT t.a FROM (tbl AS tbl) AS t; SELECT t.a FROM (SELECT * FROM c.db.tbl AS tbl) AS t; # title: wrapped aliased table with outer alias -SELECT * FROM ((((tbl AS tbl)))) AS _q_0; -SELECT * FROM (SELECT * FROM c.db.tbl AS tbl) AS _q_0; +SELECT * FROM ((((tbl AS tbl)))) AS "_0"; +SELECT * FROM (SELECT * FROM c.db.tbl AS tbl) AS "_0"; # title: join construct with three tables -SELECT * FROM (tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3) AS _q_0; -SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS _q_0; +SELECT * FROM (tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3) AS "_0"; +SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS "_0"; # title: join construct with three tables and redundant set of parentheses -SELECT * FROM ((tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3)) AS _q_0; -SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS _q_0; +SELECT * FROM ((tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3)) AS "_0"; +SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS "_0"; # title: join construct within join construct -SELECT * FROM (tbl1 AS tbl1 JOIN (tbl2 AS tbl2 JOIN tbl3 AS tbl3 ON id2 = id3) AS _q_0 ON id1 = id3) AS _q_1; -SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN (SELECT * FROM c.db.tbl2 AS tbl2 JOIN c.db.tbl3 AS tbl3 ON id2 = id3) AS _q_0 ON id1 = id3) AS _q_1; +SELECT * FROM (tbl1 AS tbl1 JOIN (tbl2 AS tbl2 JOIN tbl3 AS tbl3 ON id2 = id3) AS "_0" ON id1 = id3) AS "_1"; +SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN (SELECT * FROM c.db.tbl2 AS tbl2 JOIN c.db.tbl3 AS tbl3 ON id2 = id3) AS "_0" ON id1 = id3) AS "_1"; # title: wrapped subquery without alias SELECT * FROM ((SELECT * FROM t)); -SELECT * FROM ((SELECT * FROM c.db.t AS t) AS _q_0); +SELECT * FROM ((SELECT * FROM c.db.t AS t) AS "_0"); # title: wrapped subquery without alias joined with a table SELECT * FROM ((SELECT * FROM t1) INNER JOIN t2 ON a = b); -SELECT * FROM ((SELECT * FROM c.db.t1 AS t1) AS _q_0 INNER JOIN c.db.t2 AS t2 ON a = b); +SELECT * FROM ((SELECT * FROM c.db.t1 AS t1) AS "_0" INNER JOIN c.db.t2 AS t2 ON a = b); # title: lateral unnest with alias SELECT x FROM t, LATERAL UNNEST(t.xs) AS x; @@ -164,7 +164,7 @@ SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS x; # title: lateral unnest without alias SELECT x FROM t, LATERAL UNNEST(t.xs); -SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS _q_0; +SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS "_0"; # title: table with ordinality SELECT * FROM t CROSS JOIN JSON_ARRAY_ELEMENTS(t.response) WITH ORDINALITY AS kv_json; diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 04648cfadd..d14ad46830 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -2539,7 +2539,7 @@ WITH "item_2" AS ( "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id" FROM "item" AS "item" -), "_q_0" AS ( +), "_0" AS ( SELECT "iss"."i_brand_id" AS "brand_id", "iss"."i_class_id" AS "class_id", @@ -2619,10 +2619,10 @@ WITH "item_2" AS ( SELECT "item"."i_item_sk" AS "ss_item_sk" FROM "item_2" AS "item" - JOIN "_q_0" AS "_q_0" - ON "_q_0"."brand_id" = "item"."i_brand_id" - AND "_q_0"."category_id" = "item"."i_category_id" - AND "_q_0"."class_id" = "item"."i_class_id" + JOIN "_0" AS "_0" + ON "_0"."brand_id" = "item"."i_brand_id" + AND "_0"."category_id" = "item"."i_category_id" + AND "_0"."class_id" = "item"."i_class_id" GROUP BY "item"."i_item_sk" ), "_u_1" AS ( @@ -3394,7 +3394,7 @@ WITH "frequent_ss_items" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk" FROM "customer" AS "customer" -), "_q_0" AS ( +), "_0" AS ( SELECT SUM("store_sales"."ss_quantity" * "store_sales"."ss_sales_price") AS "csales" FROM "store_sales" AS "store_sales" @@ -3407,8 +3407,8 @@ WITH "frequent_ss_items" AS ( "customer"."c_customer_sk" ), "max_store_sales" AS ( SELECT - MAX("_q_0"."csales") AS "tpcds_cmax" - FROM "_q_0" AS "_q_0" + MAX("_0"."csales") AS "tpcds_cmax" + FROM "_0" AS "_0" ), "best_ss_customer" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk" @@ -3440,7 +3440,7 @@ WITH "frequent_ss_items" AS ( FROM "best_ss_customer" AS "best_ss_customer" GROUP BY "best_ss_customer"."c_customer_sk" -), "_q_1" AS ( +), "_1" AS ( SELECT "catalog_sales"."cs_quantity" * "catalog_sales"."cs_list_price" AS "sales" FROM "catalog_sales" AS "catalog_sales" @@ -3466,8 +3466,8 @@ WITH "frequent_ss_items" AS ( NOT "_u_3"."item_sk" IS NULL AND NOT "_u_4"."c_customer_sk" IS NULL ) SELECT - SUM("_q_1"."sales") AS "_col_0" -FROM "_q_1" AS "_q_1" + SUM("_1"."sales") AS "_col_0" +FROM "_1" AS "_1" LIMIT 100; -------------------------------------- diff --git a/tests/test_lineage.py b/tests/test_lineage.py index cad7704fb4..f7e43ecc6f 100644 --- a/tests/test_lineage.py +++ b/tests/test_lineage.py @@ -413,8 +413,8 @@ def test_subquery(self) -> None: self.assertEqual(node.name, "a") self.assertEqual(len(node.downstream), 1) node = node.downstream[0] - self.assertEqual(node.name, "_q_0.a") - self.assertEqual(node.reference_node_name, "_q_0") + self.assertEqual(node.name, "_0.a") + self.assertEqual(node.reference_node_name, "_0") def test_lineage_cte_union(self) -> None: query = """ @@ -480,7 +480,7 @@ def test_select_star(self) -> None: self.assertEqual(node.name, "x") downstream = node.downstream[0] - self.assertEqual(downstream.name, "_q_0.x") + self.assertEqual(downstream.name, "_0.x") self.assertEqual(downstream.source.sql(), "SELECT * FROM table_a AS table_a") downstream = downstream.downstream[0] @@ -565,7 +565,7 @@ def test_pivot_without_alias(self) -> None: """ node = lineage("other_a", sql) - self.assertEqual(node.downstream[0].name, "_q_0.value") + self.assertEqual(node.downstream[0].name, "_0.value") self.assertEqual(node.downstream[0].downstream[0].name, "sample_data.value") def test_pivot_with_alias(self) -> None: @@ -705,7 +705,7 @@ def test_pivot_with_subquery(self) -> None: node = lineage("product_type", sql, dialect="duckdb", schema=schema) self.assertEqual(node.downstream[0].name, "cte.product_type") - self.assertEqual(node.downstream[0].downstream[0].name, "_q_0.product_type") + self.assertEqual(node.downstream[0].downstream[0].name, "_0.product_type") self.assertEqual( node.downstream[0].downstream[0].downstream[0].name, "loan_ledger.product_type", @@ -713,7 +713,7 @@ def test_pivot_with_subquery(self) -> None: node = lineage('"2024-10"', sql, dialect="duckdb", schema=schema) self.assertEqual(node.downstream[0].name, "cte.2024-10") - self.assertEqual(node.downstream[0].downstream[0].name, "_q_0.loan_id") + self.assertEqual(node.downstream[0].downstream[0].name, "_0.loan_id") self.assertEqual(node.downstream[0].downstream[0].downstream[0].name, "loan_ledger.loan_id") def test_copy_flag(self) -> None: diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index a6c2d2f454..453d0e8e49 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -245,9 +245,9 @@ def test_qualify_tables(self): parse_one("SELECT * FROM t"), db="db", catalog="c", - canonicalize=True, + canonicalize_table_aliases=True, ).sql(), - "SELECT * FROM c.db.t AS _0", + 'SELECT * FROM c.db.t AS "_0"', ) self.assertEqual( @@ -255,18 +255,18 @@ def test_qualify_tables(self): parse_one("SELECT * FROM t1 JOIN t2 ON t1.id = t2.id"), db="db", catalog="c", - canonicalize=True, + canonicalize_table_aliases=True, ).sql(), - "SELECT * FROM c.db.t1 AS _0 JOIN c.db.t2 AS _1 ON _0.id = _1.id", + 'SELECT * FROM c.db.t1 AS "_0" JOIN c.db.t2 AS "_1" ON _0.id = _1.id', ) self.assertEqual( optimizer.qualify_tables.qualify_tables( parse_one("SELECT * FROM db1.users JOIN db2.users ON db1.users.id = db2.users.id"), catalog="c", - canonicalize=True, + canonicalize_table_aliases=True, ).sql(), - "SELECT * FROM c.db1.users AS _0 JOIN c.db2.users AS _1 ON _0.id = _1.id", + 'SELECT * FROM c.db1.users AS "_0" JOIN c.db2.users AS "_1" ON _0.id = _1.id', ) self.assertEqual( @@ -274,9 +274,9 @@ def test_qualify_tables(self): parse_one("WITH cte AS (SELECT * FROM t) SELECT * FROM cte"), db="db", catalog="c", - canonicalize=True, + canonicalize_table_aliases=True, ).sql(), - "WITH cte AS (SELECT * FROM c.db.t AS _0) SELECT * FROM cte AS _1", + 'WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1"', ) self.assertEqual( @@ -284,9 +284,9 @@ def test_qualify_tables(self): parse_one("SELECT * FROM (SELECT * FROM t)"), db="db", catalog="c", - canonicalize=True, + canonicalize_table_aliases=True, ).sql(), - "SELECT * FROM (SELECT * FROM c.db.t AS _0) AS _1", + 'SELECT * FROM (SELECT * FROM c.db.t AS "_0") AS "_1"', ) self.assertEqual( @@ -294,9 +294,9 @@ def test_qualify_tables(self): parse_one("SELECT * FROM t1, (SELECT * FROM t2) AS sub, t3"), db="db", catalog="c", - canonicalize=True, + canonicalize_table_aliases=True, ).sql(), - "SELECT * FROM c.db.t1 AS _2, (SELECT * FROM c.db.t2 AS _0) AS _1, c.db.t3 AS _3", + 'SELECT * FROM c.db.t1 AS "_2", (SELECT * FROM c.db.t2 AS "_0") AS "_1", c.db.t3 AS "_3"', ) self.assertEqual( @@ -306,9 +306,9 @@ def test_qualify_tables(self): ), db="db", catalog="c", - canonicalize=True, + canonicalize_table_aliases=True, ).sql(), - "WITH cte AS (SELECT * FROM c.db.t AS _0) SELECT * FROM cte AS _1 PIVOT(SUM(c) FOR v IN ('x', 'y')) AS _2", + 'WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1" PIVOT(SUM(c) FOR v IN (\'x\', \'y\')) AS "_2"', ) self.assertEqual( @@ -330,7 +330,7 @@ def test_qualify_tables(self): db="db", catalog="catalog", ).sql(), - "WITH cte AS (SELECT * FROM catalog.db.t AS t) SELECT * FROM cte AS cte PIVOT(SUM(c) FOR v IN ('x', 'y')) AS _q_0", + "WITH cte AS (SELECT * FROM catalog.db.t AS t) SELECT * FROM cte AS cte PIVOT(SUM(c) FOR v IN ('x', 'y')) AS \"_0\"", ) self.assertEqual( @@ -505,7 +505,7 @@ def test_qualify_columns(self, logger): self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one( - "SELECT id, dt, v FROM (SELECT t1.id, t1.dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp USING (id) LEFT JOIN t2 AS t2 USING (other_id, dt, common) WHERE t1.id > 10 GROUP BY 1, 2) AS _q_0", + "SELECT id, dt, v FROM (SELECT t1.id, t1.dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp USING (id) LEFT JOIN t2 AS t2 USING (other_id, dt, common) WHERE t1.id > 10 GROUP BY 1, 2) AS `_0`", dialect="bigquery", ), schema=MappingSchema( @@ -517,7 +517,7 @@ def test_qualify_columns(self, logger): dialect="bigquery", ), ).sql(dialect="bigquery"), - "SELECT _q_0.id AS id, _q_0.dt AS dt, _q_0.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS _q_0", + "SELECT `_0`.id AS id, `_0`.dt AS dt, `_0`.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS `_0`", ) # Detection of correlation where columns are referenced in derived tables nested within subqueries From fe061e55e67d965c7b7284b3e47a2cd7758c4113 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 21 Nov 2025 18:08:54 +0200 Subject: [PATCH 3/7] Remove pass --- sqlglot/optimizer/qualify_tables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index aa38c6cd0c..1056bb9ed5 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -181,6 +181,5 @@ def _set_alias( ): # Amend existing aliases, e.g. t.c -> _0.c if t is aliased to _0 column.set("table", exp.to_identifier(canonical_table)) - pass return expression From 42a4b69558a5ce01e7e89d48e62886072e380736 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 21 Nov 2025 19:56:54 +0200 Subject: [PATCH 4/7] Small cleanup --- sqlglot/optimizer/qualify_tables.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 1056bb9ed5..3c52fa78a7 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -84,17 +84,16 @@ def _set_alias( alias = expression.args.get("alias") or exp.TableAlias() if canonicalize_table_aliases: - quoted: t.Optional[bool] = True new_alias_name = next_alias_name() canonical_aliases[alias.name or target_alias or ""] = new_alias_name elif not alias.name: - quoted = None if target_alias else True new_alias_name = target_alias or next_alias_name() - if normalize: + if normalize and target_alias: new_alias_name = normalize_identifiers(new_alias_name, dialect=dialect).name else: return + quoted = True if canonicalize_table_aliases or not target_alias else None alias.set("this", exp.to_identifier(new_alias_name, quoted=quoted)) expression.set("alias", alias) @@ -123,7 +122,7 @@ def _set_alias( for name, source in scope.sources.items(): if isinstance(source, exp.Table): - # When the name is empty, it means that we have a non-table source, e.g. a pivoted Cte + # When the name is empty, it means that we have a non-table source, e.g. a pivoted cte is_real_table_source = bool(name) if pivot := seq_get(source.args.get("pivots") or [], 0): From d1ce89bdcafcf11038377ac2ecea01fc9456f2eb Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 21 Nov 2025 20:33:27 +0200 Subject: [PATCH 5/7] Move tests to qualify_tables.sql --- sqlglot/optimizer/qualify_tables.py | 4 +- tests/fixtures/optimizer/qualify_tables.sql | 39 +++++++++- tests/test_optimizer.py | 79 ++------------------- 3 files changed, 45 insertions(+), 77 deletions(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 3c52fa78a7..1aaabbab86 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -131,7 +131,7 @@ def _set_alias( _set_alias(source, target_alias=name or source.name or None, normalize=True) source_fqn = ".".join(p.name for p in source.parts) - table_aliases[source_fqn] = exp.to_identifier(source.alias) + table_aliases[source_fqn] = source.args["alias"].this.copy() if pivot: target_alias = source.alias if pivot.unpivot else None @@ -179,6 +179,6 @@ def _set_alias( and (canonical_table := canonical_aliases.get(table, "")) != column.table ): # Amend existing aliases, e.g. t.c -> _0.c if t is aliased to _0 - column.set("table", exp.to_identifier(canonical_table)) + column.set("table", exp.to_identifier(canonical_table, quoted=True)) return expression diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index aee131ae1f..49305fb598 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -213,5 +213,40 @@ WITH cte AS (SELECT 1 AS c, 'name' AS name) UPDATE t SET name = cte.name FROM ct WITH cte AS (SELECT 1 AS c, 'name' AS name) UPDATE c.db.t SET name = cte.name FROM cte WHERE cte.c = 1; # title: avoid qualifying CTE with DELETE -WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE t FROM t AS t INNER JOIN cte ON t.id = cte.c -WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE c.db.t FROM c.db.t AS t INNER JOIN cte ON t.id = cte.c \ No newline at end of file +WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE t FROM t AS t INNER JOIN cte ON t.id = cte.c; +WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE c.db.t FROM c.db.t AS t INNER JOIN cte ON t.id = cte.c; + +# title: canonicalize single table alias +# canonicalize_table_aliases: true +SELECT * FROM t; +SELECT * FROM c.db.t AS "_0"; + +# title: canonicalize join table aliases +# canonicalize_table_aliases: true +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id; +SELECT * FROM c.db.t1 AS "_0" JOIN c.db.t2 AS "_1" ON "_0".id = "_1".id; + +# title: canonicalize join with different databases +# canonicalize_table_aliases: true +SELECT * FROM db1.users JOIN db2.users ON db1.users.id = db2.users.id; +SELECT * FROM c.db1.users AS "_0" JOIN c.db2.users AS "_1" ON "_0".id = "_1".id; + +# title: canonicalize CTE alias +# canonicalize_table_aliases: true +WITH cte AS (SELECT * FROM t) SELECT * FROM cte; +WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1"; + +# title: canonicalize subquery alias +# canonicalize_table_aliases: true +SELECT * FROM (SELECT * FROM t); +SELECT * FROM (SELECT * FROM c.db.t AS "_0") AS "_1"; + +# title: canonicalize multiple tables with subquery +# canonicalize_table_aliases: true +SELECT * FROM t1, (SELECT * FROM t2) AS sub, t3; +SELECT * FROM c.db.t1 AS "_2", (SELECT * FROM c.db.t2 AS "_0") AS "_1", c.db.t3 AS "_3"; + +# title: canonicalize CTE with PIVOT +# canonicalize_table_aliases: true +WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y')); +WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1" PIVOT(SUM(c) FOR v IN ('x', 'y')) AS "_2"; diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 453d0e8e49..410d81545d 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -163,11 +163,14 @@ def check_file( title = meta.get("title") or f"{i}, {sql}" if only and title != only: continue + dialect = meta.get("dialect") leave_tables_isolated = meta.get("leave_tables_isolated") validate_qualify_columns = meta.get("validate_qualify_columns") + canonicalize_table_aliases = meta.get("canonicalize_table_aliases") + + func_kwargs = kwargs.copy() - func_kwargs = {**kwargs} if leave_tables_isolated is not None: func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated) @@ -175,9 +178,10 @@ def check_file( func_kwargs["validate_qualify_columns"] = string_to_bool( validate_qualify_columns ) - if dialect: func_kwargs["dialect"] = dialect + if canonicalize_table_aliases: + func_kwargs["canonicalize_table_aliases"] = canonicalize_table_aliases future = pool.submit(parse_and_optimize, func, sql, dialect, **func_kwargs) results[future] = ( @@ -240,77 +244,6 @@ def test_isolate_table_selects(self): ) def test_qualify_tables(self): - self.assertEqual( - optimizer.qualify_tables.qualify_tables( - parse_one("SELECT * FROM t"), - db="db", - catalog="c", - canonicalize_table_aliases=True, - ).sql(), - 'SELECT * FROM c.db.t AS "_0"', - ) - - self.assertEqual( - optimizer.qualify_tables.qualify_tables( - parse_one("SELECT * FROM t1 JOIN t2 ON t1.id = t2.id"), - db="db", - catalog="c", - canonicalize_table_aliases=True, - ).sql(), - 'SELECT * FROM c.db.t1 AS "_0" JOIN c.db.t2 AS "_1" ON _0.id = _1.id', - ) - - self.assertEqual( - optimizer.qualify_tables.qualify_tables( - parse_one("SELECT * FROM db1.users JOIN db2.users ON db1.users.id = db2.users.id"), - catalog="c", - canonicalize_table_aliases=True, - ).sql(), - 'SELECT * FROM c.db1.users AS "_0" JOIN c.db2.users AS "_1" ON _0.id = _1.id', - ) - - self.assertEqual( - optimizer.qualify_tables.qualify_tables( - parse_one("WITH cte AS (SELECT * FROM t) SELECT * FROM cte"), - db="db", - catalog="c", - canonicalize_table_aliases=True, - ).sql(), - 'WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1"', - ) - - self.assertEqual( - optimizer.qualify_tables.qualify_tables( - parse_one("SELECT * FROM (SELECT * FROM t)"), - db="db", - catalog="c", - canonicalize_table_aliases=True, - ).sql(), - 'SELECT * FROM (SELECT * FROM c.db.t AS "_0") AS "_1"', - ) - - self.assertEqual( - optimizer.qualify_tables.qualify_tables( - parse_one("SELECT * FROM t1, (SELECT * FROM t2) AS sub, t3"), - db="db", - catalog="c", - canonicalize_table_aliases=True, - ).sql(), - 'SELECT * FROM c.db.t1 AS "_2", (SELECT * FROM c.db.t2 AS "_0") AS "_1", c.db.t3 AS "_3"', - ) - - self.assertEqual( - optimizer.qualify_tables.qualify_tables( - parse_one( - "WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y'))" - ), - db="db", - catalog="c", - canonicalize_table_aliases=True, - ).sql(), - 'WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1" PIVOT(SUM(c) FOR v IN (\'x\', \'y\')) AS "_2"', - ) - self.assertEqual( optimizer.qualify.qualify( parse_one("WITH tesT AS (SELECT * FROM t1) SELECT * FROM test", "bigquery"), From ebb4aa21c95cd44ff783e59366a09e269c7cf547 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Mon, 24 Nov 2025 13:41:28 +0200 Subject: [PATCH 6/7] Explain quoted assignment reason with a comment --- sqlglot/optimizer/qualify_tables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 1aaabbab86..9d6dda6e8d 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -93,7 +93,9 @@ def _set_alias( else: return + # Auto-generated aliases (_1, _2, ...) are quoted in order to be valid across all dialects quoted = True if canonicalize_table_aliases or not target_alias else None + alias.set("this", exp.to_identifier(new_alias_name, quoted=quoted)) expression.set("alias", alias) From 7fc2c6da4925fa4402c00ad5d3be2cfd54de6068 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Mon, 24 Nov 2025 14:40:24 +0200 Subject: [PATCH 7/7] Fix correlated subquery issue --- sqlglot/optimizer/qualify_tables.py | 25 +++++++++++++-------- sqlglot/optimizer/scope.py | 21 +++++++++++++---- tests/fixtures/optimizer/qualify_tables.sql | 10 +++++++++ 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 9d6dda6e8d..47269f8539 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -73,10 +73,9 @@ def _qualify(table: exp.Table) -> None: if isinstance(node, exp.Table) and node.name not in cte_names: _qualify(node) - canonical_aliases: t.Dict[str, str] = {} - def _set_alias( expression: exp.Expression, + canonical_aliases: t.Dict[str, str], target_alias: t.Optional[str] = None, scope: t.Optional[Scope] = None, normalize: bool = False, @@ -103,6 +102,9 @@ def _set_alias( scope.rename_source(None, new_alias_name) for scope in traverse_scope(expression): + local_columns = scope.local_columns + canonical_aliases: t.Dict[str, str] = {} + for query in scope.subqueries: subquery = query.parent if isinstance(subquery, exp.Subquery): @@ -116,9 +118,9 @@ def _set_alias( derived_table.this.replace(exp.select("*").from_(unnested.copy(), copy=False)) derived_table.this.set("joins", joins) - _set_alias(derived_table, scope=scope) + _set_alias(derived_table, canonical_aliases, scope=scope) if pivot := seq_get(derived_table.args.get("pivots") or [], 0): - _set_alias(pivot) + _set_alias(pivot, canonical_aliases) table_aliases = {} @@ -130,14 +132,19 @@ def _set_alias( if pivot := seq_get(source.args.get("pivots") or [], 0): name = source.name - _set_alias(source, target_alias=name or source.name or None, normalize=True) + _set_alias( + source, + canonical_aliases, + target_alias=name or source.name or None, + normalize=True, + ) source_fqn = ".".join(p.name for p in source.parts) table_aliases[source_fqn] = source.args["alias"].this.copy() if pivot: target_alias = source.alias if pivot.unpivot else None - _set_alias(pivot, target_alias=target_alias, normalize=True) + _set_alias(pivot, canonical_aliases, target_alias=target_alias, normalize=True) # This case corresponds to a pivoted CTE, we don't want to qualify that if isinstance(scope.sources.get(source.alias_or_name), Scope): @@ -149,7 +156,7 @@ def _set_alias( if on_qualify: on_qualify(source) elif isinstance(source, Scope) and source.is_udtf: - _set_alias(udtf := source.expression) + _set_alias(udtf := source.expression, canonical_aliases) table_alias = udtf.args["alias"] @@ -162,9 +169,9 @@ def _set_alias( for table in scope.tables: if not table.alias and isinstance(table.parent, (exp.From, exp.Join)): - _set_alias(table, target_alias=table.name) + _set_alias(table, canonical_aliases, target_alias=table.name) - for column in scope.columns: + for column in local_columns: table = column.table if column.db: diff --git a/sqlglot/optimizer/scope.py b/sqlglot/optimizer/scope.py index e3754db289..85042d001b 100644 --- a/sqlglot/optimizer/scope.py +++ b/sqlglot/optimizer/scope.py @@ -98,6 +98,7 @@ def clear_cache(self): self._selected_sources = None self._columns = None self._external_columns = None + self._local_columns = None self._join_hints = None self._pivots = None self._references = None @@ -372,8 +373,7 @@ def external_columns(self): Columns that appear to reference sources in outer scopes. Returns: - list[exp.Column]: Column instances that don't reference - sources in the current scope. + list[exp.Column]: Column instances that don't reference sources in the current scope. """ if self._external_columns is None: if isinstance(self.expression, exp.SetOperation): @@ -383,12 +383,25 @@ def external_columns(self): self._external_columns = [ c for c in self.columns - if c.table not in self.selected_sources - and c.table not in self.semi_or_anti_join_tables + if c.table not in self.sources and c.table not in self.semi_or_anti_join_tables ] return self._external_columns + @property + def local_columns(self): + """ + Columns in this scope that are not external. + + Returns: + list[exp.Column]: Column instances that reference sources in the current scope. + """ + if self._local_columns is None: + external_columns = set(self.external_columns) + self._local_columns = [c for c in self.columns if c not in external_columns] + + return self._local_columns + @property def unqualified_columns(self): """ diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 49305fb598..66653574a6 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -250,3 +250,13 @@ SELECT * FROM c.db.t1 AS "_2", (SELECT * FROM c.db.t2 AS "_0") AS "_1", c.db.t3 # canonicalize_table_aliases: true WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y')); WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1" PIVOT(SUM(c) FOR v IN ('x', 'y')) AS "_2"; + +# title: canonicalize sources that reference external columns +# canonicalize_table_aliases: true +SELECT * FROM x WHERE x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a LIMIT 10); +SELECT * FROM c.db.x AS "_1" WHERE "_1".a = (SELECT SUM("_0".c) AS c FROM c.db.y AS "_0" WHERE "_0".a = "_1".a LIMIT 10); + +# title: canonicalize sources that have colliding aliases +# canonicalize_table_aliases: true +SELECT t.foo FROM t AS t, (SELECT t.bar FROM t AS t); +SELECT "_2".foo FROM c.db.t AS "_2", (SELECT "_0".bar FROM c.db.t AS "_0") AS "_1";