diff --git a/sqlglot/optimizer/qualify.py b/sqlglot/optimizer/qualify.py index 4af67f15fe..c3aec69399 100644 --- a/sqlglot/optimizer/qualify.py +++ b/sqlglot/optimizer/qualify.py @@ -30,6 +30,7 @@ def qualify( validate_qualify_columns: bool = True, quote_identifiers: bool = True, identify: bool = True, + canonicalize_table_aliases: bool = False, on_qualify: t.Optional[t.Callable[[exp.Expression], None]] = None, ) -> exp.Expression: """ @@ -62,6 +63,8 @@ def qualify( This step is necessary to ensure correctness for case sensitive queries. But this flag is provided in case this step is performed at a later time. identify: If True, quote all identifiers, else only necessary ones. + canonicalize_table_aliases: Whether to use canonical aliases (_0, _1, ...) for all sources + instead of preserving table names. on_qualify: Callback after a table has been qualified. Returns: @@ -81,6 +84,7 @@ def qualify( catalog=catalog, dialect=dialect, on_qualify=on_qualify, + canonicalize_table_aliases=canonicalize_table_aliases, ) if isolate_tables: diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 3e9935149a..47269f8539 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -4,7 +4,7 @@ from sqlglot import exp from sqlglot.dialects.dialect import Dialect, DialectType -from sqlglot.helper import name_sequence +from sqlglot.helper import name_sequence, seq_get from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.scope import Scope, traverse_scope @@ -18,6 +18,7 @@ def qualify_tables( catalog: t.Optional[str | exp.Identifier] = None, on_qualify: t.Optional[t.Callable[[exp.Expression], None]] = None, dialect: DialectType = None, + canonicalize_table_aliases: bool = False, ) -> E: """ Rewrite sqlglot AST to have fully qualified tables. Join constructs such as @@ -39,16 +40,14 @@ def qualify_tables( catalog: Catalog name on_qualify: Callback after a table has been qualified. dialect: The dialect to parse catalog and schema into. + canonicalize_table_aliases: Whether to use canonical aliases (_0, _1, ...) for all sources + instead of preserving table names. Defaults to False. Returns: The qualified expression. """ dialect = Dialect.get_or_raise(dialect) - - alias_sequence = name_sequence("_q_") - - def next_alias_name() -> str: - return normalize_identifiers(alias_sequence(), dialect=dialect).name + next_alias_name = name_sequence("_") if db := db or None: db = exp.parse_identifier(db, dialect=dialect) @@ -74,7 +73,38 @@ def _qualify(table: exp.Table) -> None: if isinstance(node, exp.Table) and node.name not in cte_names: _qualify(node) + def _set_alias( + expression: exp.Expression, + canonical_aliases: t.Dict[str, str], + target_alias: t.Optional[str] = None, + scope: t.Optional[Scope] = None, + normalize: bool = False, + ) -> None: + alias = expression.args.get("alias") or exp.TableAlias() + + if canonicalize_table_aliases: + new_alias_name = next_alias_name() + canonical_aliases[alias.name or target_alias or ""] = new_alias_name + elif not alias.name: + new_alias_name = target_alias or next_alias_name() + if normalize and target_alias: + new_alias_name = normalize_identifiers(new_alias_name, dialect=dialect).name + else: + return + + # Auto-generated aliases (_1, _2, ...) are quoted in order to be valid across all dialects + quoted = True if canonicalize_table_aliases or not target_alias else None + + alias.set("this", exp.to_identifier(new_alias_name, quoted=quoted)) + expression.set("alias", alias) + + if scope: + scope.rename_source(None, new_alias_name) + for scope in traverse_scope(expression): + local_columns = scope.local_columns + canonical_aliases: t.Dict[str, str] = {} + for query in scope.subqueries: subquery = query.parent if isinstance(subquery, exp.Subquery): @@ -88,61 +118,48 @@ def _qualify(table: exp.Table) -> None: derived_table.this.replace(exp.select("*").from_(unnested.copy(), copy=False)) derived_table.this.set("joins", joins) - if not derived_table.args.get("alias"): - alias = next_alias_name() - derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias))) - scope.rename_source(None, alias) - - pivots = derived_table.args.get("pivots") - if pivots and not pivots[0].alias: - pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_alias_name()))) + _set_alias(derived_table, canonical_aliases, scope=scope) + if pivot := seq_get(derived_table.args.get("pivots") or [], 0): + _set_alias(pivot, canonical_aliases) table_aliases = {} for name, source in scope.sources.items(): if isinstance(source, exp.Table): - pivots = source.args.get("pivots") - if not source.alias: - # Don't add the pivot's alias to the pivoted table, use the table's name instead - if pivots and pivots[0].alias == name: - name = source.name - - # Mutates the source by attaching an alias to it - normalized_alias = normalize_identifiers( - name or source.name or alias_sequence(), dialect=dialect - ) - exp.alias_(source, normalized_alias, copy=False, table=True) - - table_aliases[".".join(p.name for p in source.parts)] = exp.to_identifier( - source.alias + # When the name is empty, it means that we have a non-table source, e.g. a pivoted cte + is_real_table_source = bool(name) + + if pivot := seq_get(source.args.get("pivots") or [], 0): + name = source.name + + _set_alias( + source, + canonical_aliases, + target_alias=name or source.name or None, + normalize=True, ) - if pivots: - pivot = pivots[0] - if not pivot.alias: - pivot_alias = normalize_identifiers( - source.alias if pivot.unpivot else alias_sequence(), - dialect=dialect, - ) - pivot.set("alias", exp.TableAlias(this=exp.to_identifier(pivot_alias))) + source_fqn = ".".join(p.name for p in source.parts) + table_aliases[source_fqn] = source.args["alias"].this.copy() + + if pivot: + target_alias = source.alias if pivot.unpivot else None + _set_alias(pivot, canonical_aliases, target_alias=target_alias, normalize=True) # This case corresponds to a pivoted CTE, we don't want to qualify that if isinstance(scope.sources.get(source.alias_or_name), Scope): continue - _qualify(source) + if is_real_table_source: + _qualify(source) - if on_qualify: - on_qualify(source) + if on_qualify: + on_qualify(source) elif isinstance(source, Scope) and source.is_udtf: - udtf = source.expression - table_alias = udtf.args.get("alias") or exp.TableAlias( - this=exp.to_identifier(next_alias_name()) - ) - udtf.set("alias", table_alias) + _set_alias(udtf := source.expression, canonical_aliases) + + table_alias = udtf.args["alias"] - if not table_alias.name: - table_alias.set("this", exp.to_identifier(next_alias_name())) if isinstance(udtf, exp.Values) and not table_alias.columns: column_aliases = [ normalize_identifiers(i, dialect=dialect) @@ -152,9 +169,11 @@ def _qualify(table: exp.Table) -> None: for table in scope.tables: if not table.alias and isinstance(table.parent, (exp.From, exp.Join)): - exp.alias_(table, table.name, copy=False, table=True) + _set_alias(table, canonical_aliases, target_alias=table.name) + + for column in local_columns: + table = column.table - for column in scope.columns: if column.db: table_alias = table_aliases.get(".".join(p.name for p in column.parts[0:-1])) @@ -163,5 +182,12 @@ def _qualify(table: exp.Table) -> None: column.set(p, None) column.set("table", table_alias.copy()) + elif ( + canonical_aliases + and table + and (canonical_table := canonical_aliases.get(table, "")) != column.table + ): + # Amend existing aliases, e.g. t.c -> _0.c if t is aliased to _0 + column.set("table", exp.to_identifier(canonical_table, quoted=True)) return expression diff --git a/sqlglot/optimizer/scope.py b/sqlglot/optimizer/scope.py index e3754db289..85042d001b 100644 --- a/sqlglot/optimizer/scope.py +++ b/sqlglot/optimizer/scope.py @@ -98,6 +98,7 @@ def clear_cache(self): self._selected_sources = None self._columns = None self._external_columns = None + self._local_columns = None self._join_hints = None self._pivots = None self._references = None @@ -372,8 +373,7 @@ def external_columns(self): Columns that appear to reference sources in outer scopes. Returns: - list[exp.Column]: Column instances that don't reference - sources in the current scope. + list[exp.Column]: Column instances that don't reference sources in the current scope. """ if self._external_columns is None: if isinstance(self.expression, exp.SetOperation): @@ -383,12 +383,25 @@ def external_columns(self): self._external_columns = [ c for c in self.columns - if c.table not in self.selected_sources - and c.table not in self.semi_or_anti_join_tables + if c.table not in self.sources and c.table not in self.semi_or_anti_join_tables ] return self._external_columns + @property + def local_columns(self): + """ + Columns in this scope that are not external. + + Returns: + list[exp.Column]: Column instances that reference sources in the current scope. + """ + if self._local_columns is None: + external_columns = set(self.external_columns) + self._local_columns = [c for c in self.columns if c not in external_columns] + + return self._local_columns + @property def unqualified_columns(self): """ diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 88dff813a2..918983a788 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -33,7 +33,7 @@ FROM ( WHERE object_pointstext IS NOT NULL ); CREATE OR REPLACE TEMPORARY VIEW `latest_boo` AS -WITH `_q_1` AS ( +WITH `_1` AS ( SELECT EXPLODE_OUTER(SPLIT(`boo`.`object_pointstext`, ',')) AS `points` FROM `boo` AS `boo` @@ -41,9 +41,9 @@ WITH `_q_1` AS ( NOT `boo`.`object_pointstext` IS NULL ) SELECT - TRIM(SPLIT(`_q_1`.`points`, ':')[0]) AS `points_type`, - TRIM(SPLIT(`_q_1`.`points`, ':')[1]) AS `points_value` -FROM `_q_1` AS `_q_1`; + TRIM(SPLIT(`_1`.`points`, ':')[0]) AS `points_type`, + TRIM(SPLIT(`_1`.`points`, ':')[1]) AS `points_value` +FROM `_1` AS `_1`; # title: Union in CTE WITH cte AS ( @@ -545,52 +545,52 @@ QUALIFY # execute: false SELECT * FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); SELECT - "_q_1"."a" AS "a", - "_q_1"."x" AS "x", - "_q_1"."y" AS "y", - "_q_1"."z" AS "z" + "_1"."a" AS "a", + "_1"."x" AS "x", + "_1"."y" AS "y", + "_1"."z" AS "z" FROM ( SELECT "tb"."a" AS "a", "tb"."b" AS "b", "tb"."c" AS "c" FROM "sc"."tb" AS "tb" -) AS "_q_0" -PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; +) AS "_0" +PIVOT(SUM("_0"."c") FOR "_0"."b" IN ('x', 'y', 'z')) AS "_1"; # title: pivoted source with explicit selections where one of them is excluded & selected at the same time # note: we need to respect the exclude when selecting * from pivoted source and not include the computed column twice # execute: false SELECT * EXCEPT (x), CAST(x AS TEXT) AS x FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); SELECT - "_q_1"."a" AS "a", - "_q_1"."y" AS "y", - "_q_1"."z" AS "z", - CAST("_q_1"."x" AS TEXT) AS "x" + "_1"."a" AS "a", + "_1"."y" AS "y", + "_1"."z" AS "z", + CAST("_1"."x" AS TEXT) AS "x" FROM ( SELECT "tb"."a" AS "a", "tb"."b" AS "b", "tb"."c" AS "c" FROM "sc"."tb" AS "tb" -) AS "_q_0" -PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; +) AS "_0" +PIVOT(SUM("_0"."c") FOR "_0"."b" IN ('x', 'y', 'z')) AS "_1"; # title: pivoted source with implicit selections # execute: false SELECT * FROM (SELECT * FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - "_q_1"."g" AS "g", - "_q_1"."x" AS "x", - "_q_1"."y" AS "y" + "_1"."g" AS "g", + "_1"."x" AS "x", + "_1"."y" AS "y" FROM ( SELECT "u"."f" AS "f", "u"."g" AS "g", "u"."h" AS "h" FROM "u" AS "u" -) AS "_q_0" -PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1"; +) AS "_0" +PIVOT(SUM("_0"."f") FOR "_0"."h" IN ('x', 'y')) AS "_1"; # title: selecting explicit qualified columns from pivoted source with explicit selections # execute: false @@ -603,17 +603,17 @@ FROM ( "u"."f" AS "f", "u"."h" AS "h" FROM "u" AS "u" -) AS "_q_0" -PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv"; +) AS "_0" +PIVOT(SUM("_0"."f") FOR "_0"."h" IN ('x', 'y')) AS "piv"; # title: selecting explicit unqualified columns from pivoted source with implicit selections # execute: false SELECT x, y FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - "_q_0"."x" AS "x", - "_q_0"."y" AS "y" + "_0"."x" AS "x", + "_0"."y" AS "y" FROM "u" AS "u" -PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; +PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_0"; # title: selecting all columns from a pivoted CTE source, using alias for the aggregation and generating bigquery # execute: false @@ -627,22 +627,22 @@ WITH `u_cte` AS ( FROM `u` AS `u` ) SELECT - `_q_0`.`g` AS `g`, - `_q_0`.`sum_x` AS `sum_x`, - `_q_0`.`sum_y` AS `sum_y` + `_0`.`g` AS `g`, + `_0`.`sum_x` AS `sum_x`, + `_0`.`sum_y` AS `sum_y` FROM `u_cte` AS `u_cte` -PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_q_0`; +PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_0`; # title: selecting all columns from a pivoted source and generating snowflake # execute: false # dialect: snowflake SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - "_Q_0"."G" AS "G", - "_Q_0"."'x'" AS "'x'", - "_Q_0"."'y'" AS "'y'" + "_0"."G" AS "G", + "_0"."'x'" AS "'x'", + "_0"."'y'" AS "'y'" FROM "U" AS "U" -PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_Q_0"; +PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_0"; # title: selecting all columns from a pivoted source and generating spark # note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`) @@ -650,15 +650,15 @@ PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_Q_0"; # dialect: spark SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT - `_q_0`.`g` AS `g`, - `_q_0`.`x` AS `x`, - `_q_0`.`y` AS `y` + `_0`.`g` AS `g`, + `_0`.`x` AS `x`, + `_0`.`y` AS `y` FROM ( SELECT * FROM `u` AS `u` PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y')) -) AS `_q_0`; +) AS `_0`; # title: selecting all columns from a pivoted source, pivot has column aliases # execute: false @@ -730,10 +730,10 @@ UNPIVOT("sales" FOR "month" IN ("m_sales"."jan", "m_sales"."feb")) AS "unpiv"("a # dialect: snowflake SELECT * FROM (SELECT * FROM m_sales) AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid; SELECT - "_Q_0"."EMPID" AS "EMPID", - "_Q_0"."DEPT" AS "DEPT", - "_Q_0"."MONTH" AS "MONTH", - "_Q_0"."SALES" AS "SALES" + "_0"."EMPID" AS "EMPID", + "_0"."DEPT" AS "DEPT", + "_0"."MONTH" AS "MONTH", + "_0"."SALES" AS "SALES" FROM ( SELECT "M_SALES"."EMPID" AS "EMPID", @@ -742,9 +742,9 @@ FROM ( "M_SALES"."FEB" AS "FEB" FROM "M_SALES" AS "M_SALES" ) AS "M_SALES" -UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_Q_0" +UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_0" ORDER BY - "_Q_0"."EMPID"; + "_0"."EMPID"; # title: unpivoted table source with a single value column, unpivot columns can be qualified # execute: false @@ -987,7 +987,7 @@ LEFT JOIN ( # title: select * from wrapped subquery # execute: false SELECT * FROM ((SELECT * FROM tbl)); -WITH "_q_0" AS ( +WITH "_0" AS ( SELECT * FROM "tbl" AS "tbl" @@ -995,7 +995,7 @@ WITH "_q_0" AS ( SELECT * FROM ( - "_q_0" AS "_q_0" + "_0" AS "_0" ); # title: select * from wrapped subquery joined to a table (known schema) @@ -1014,7 +1014,7 @@ FROM ( # title: select * from wrapped subquery joined to a table (unknown schema) # execute: false SELECT * FROM ((SELECT c FROM t1) JOIN t2); -WITH "_q_0" AS ( +WITH "_0" AS ( SELECT "t1"."c" AS "c" FROM "t1" AS "t1" @@ -1022,7 +1022,7 @@ WITH "_q_0" AS ( SELECT * FROM ( - "_q_0" AS "_q_0" + "_0" AS "_0" CROSS JOIN "t2" AS "t2" ); @@ -1039,11 +1039,11 @@ FROM ( # title: select * from wrapped join of subqueries (unknown schema) # execute: false SELECT * FROM ((SELECT * FROM t1) JOIN (SELECT * FROM t2)); -WITH "_q_0" AS ( +WITH "_0" AS ( SELECT * FROM "t1" AS "t1" -), "_q_1" AS ( +), "_1" AS ( SELECT * FROM "t2" AS "t2" @@ -1051,8 +1051,8 @@ WITH "_q_0" AS ( SELECT * FROM ( - "_q_0" AS "_q_0" - CROSS JOIN "_q_1" AS "_q_1" + "_0" AS "_0" + CROSS JOIN "_1" AS "_1" ); # title: select * from wrapped join of subqueries (known schema) @@ -1211,17 +1211,17 @@ SELECT Name, (SELECT Name, explode(Fruits) as FruitStruct FROM fruits_table); -WITH `_q_0` AS ( +WITH `_0` AS ( SELECT `fruits_table`.`name` AS `name`, EXPLODE(`fruits_table`.`fruits`) AS `fruitstruct` FROM `fruits_table` AS `fruits_table` ) SELECT - `_q_0`.`name` AS `name`, - `_q_0`.`fruitstruct`.`$id` AS `$id`, - `_q_0`.`fruitstruct`.`value` AS `value` -FROM `_q_0` AS `_q_0`; + `_0`.`name` AS `name`, + `_0`.`fruitstruct`.`$id` AS `$id`, + `_0`.`fruitstruct`.`value` AS `value` +FROM `_0` AS `_0`; # title: mysql is case-sensitive by default # dialect: mysql @@ -1471,7 +1471,7 @@ WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' AS (y WITH `a` AS ( SELECT 'v' AS `x` -), `_q_0` AS ( +), `_0` AS ( SELECT TRANSFORM(`a`.`x`) USING 'cat' AS ( `y` STRING @@ -1479,8 +1479,8 @@ WITH `a` AS ( FROM `a` AS `a` ) SELECT - `_q_0`.`y` AS `y` -FROM `_q_0` AS `_q_0`; + `_0`.`y` AS `y` +FROM `_0` AS `_0`; # title: SELECT TRANSFORM ... Spark clause when schema is not provided # execute: false @@ -1489,15 +1489,15 @@ WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' FROM WITH `a` AS ( SELECT 'v' AS `x` -), `_q_0` AS ( +), `_0` AS ( SELECT TRANSFORM(`a`.`x`) USING 'cat' FROM `a` AS `a` ) SELECT - `_q_0`.`key` AS `key`, - `_q_0`.`value` AS `value` -FROM `_q_0` AS `_q_0`; + `_0`.`key` AS `key`, + `_0`.`value` AS `value` +FROM `_0` AS `_0`; # title: avoid reordering of non inner joins # execute: true diff --git a/tests/fixtures/optimizer/pushdown_projections.sql b/tests/fixtures/optimizer/pushdown_projections.sql index 03c42fb863..6921afa9f4 100644 --- a/tests/fixtures/optimizer/pushdown_projections.sql +++ b/tests/fixtures/optimizer/pushdown_projections.sql @@ -1,8 +1,8 @@ SELECT a FROM (SELECT * FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT x.a AS a FROM x AS x) AS "_0"; SELECT 1 FROM (SELECT * FROM x) WHERE b = 2; -SELECT 1 AS "1" FROM (SELECT x.b AS b FROM x AS x) AS _q_0 WHERE _q_0.b = 2; +SELECT 1 AS "1" FROM (SELECT x.b AS b FROM x AS x) AS "_0" WHERE "_0".b = 2; SELECT a, b, a from x; SELECT x.a AS a, x.b AS b, x.a AS a FROM x AS x; @@ -17,16 +17,16 @@ SELECT x1.a FROM (SELECT * FROM x) AS x1, (SELECT * FROM x) AS x2; SELECT x1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS x1, (SELECT 1 AS _ FROM x AS x) AS x2; SELECT a FROM (SELECT DISTINCT a, b FROM x); -SELECT _q_0.a AS a FROM (SELECT DISTINCT x.a AS a, x.b AS b FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT DISTINCT x.a AS a, x.b AS b FROM x AS x) AS "_0"; SELECT a FROM (SELECT a, b FROM x UNION ALL SELECT a, b FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION ALL SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT x.a AS a FROM x AS x UNION ALL SELECT x.a AS a FROM x AS x) AS "_0"; WITH t1 AS (SELECT x.a AS a, x.b AS b FROM x UNION ALL SELECT z.b AS b, z.c AS c FROM z) SELECT a, b FROM t1; WITH t1 AS (SELECT x.a AS a, x.b AS b FROM x AS x UNION ALL SELECT z.b AS b, z.c AS c FROM z AS z) SELECT t1.a AS a, t1.b AS b FROM t1 AS t1; SELECT a FROM (SELECT a, b FROM x UNION SELECT a, b FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x UNION SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0; +SELECT "_0".a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x UNION SELECT x.a AS a, x.b AS b FROM x AS x) AS "_0"; WITH y AS (SELECT * FROM x) SELECT a FROM y; WITH y AS (SELECT x.a AS a FROM x AS x) SELECT y.a AS a FROM y AS y; @@ -38,10 +38,10 @@ WITH z AS (SELECT * FROM x) SELECT a FROM z UNION SELECT a FROM z; WITH z AS (SELECT x.a AS a FROM x AS x) SELECT z.a AS a FROM z AS z UNION SELECT z.a AS a FROM z AS z; SELECT b FROM (SELECT a, SUM(b) AS b FROM x GROUP BY a); -SELECT _q_0.b AS b FROM (SELECT SUM(x.b) AS b FROM x AS x GROUP BY x.a) AS _q_0; +SELECT "_0".b AS b FROM (SELECT SUM(x.b) AS b FROM x AS x GROUP BY x.a) AS "_0"; SELECT b FROM (SELECT a, SUM(b) AS b FROM x ORDER BY a); -SELECT _q_0.b AS b FROM (SELECT x.a AS a, SUM(x.b) AS b FROM x AS x ORDER BY a) AS _q_0; +SELECT "_0".b AS b FROM (SELECT x.a AS a, SUM(x.b) AS b FROM x AS x ORDER BY a) AS "_0"; SELECT x FROM (VALUES(1, 2)) AS q(x, y); SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y); @@ -56,7 +56,7 @@ SELECT x FROM VALUES(1, 2) AS q(x, y); SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y); SELECT i.a FROM x AS i LEFT JOIN (SELECT a, b FROM (SELECT a, b FROM x)) AS j ON i.a = j.a; -SELECT i.a AS a FROM x AS i LEFT JOIN (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) AS j ON i.a = j.a; +SELECT i.a AS a FROM x AS i LEFT JOIN (SELECT "_0".a AS a FROM (SELECT x.a AS a FROM x AS x) AS "_0") AS j ON i.a = j.a; WITH cte AS (SELECT source.a AS a, ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC) AS index FROM source AS source QUALIFY index) SELECT cte.a AS a FROM cte; WITH cte AS (SELECT source.a AS a FROM source AS source QUALIFY ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC)) SELECT cte.a AS a FROM cte AS cte; @@ -87,31 +87,31 @@ WITH cte AS (SELECT t.col AS col FROM t AS t) SELECT CASE WHEN 1 IN (SELECT UNNE -------------------------------------- SELECT a FROM (SELECT * FROM zz) WHERE b = 1; -SELECT _q_0.a AS a FROM (SELECT zz.a AS a, zz.b AS b FROM zz AS zz) AS _q_0 WHERE _q_0.b = 1; +SELECT "_0".a AS a FROM (SELECT zz.a AS a, zz.b AS b FROM zz AS zz) AS "_0" WHERE "_0".b = 1; SELECT a FROM (SELECT * FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc); -SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _q_0; +SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS "_0"; SELECT a FROM (SELECT a FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc); -SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _q_0; +SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS "_0"; SELECT a FROM (SELECT * FROM aa CROSS JOIN bb); -SELECT _q_0.a AS a FROM (SELECT a AS a FROM aa AS aa CROSS JOIN bb AS bb) AS _q_0; +SELECT "_0".a AS a FROM (SELECT a AS a FROM aa AS aa CROSS JOIN bb AS bb) AS "_0"; SELECT a FROM (SELECT aa.* FROM aa); -SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS _q_0; +SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS "_0"; SELECT a FROM (SELECT * FROM (SELECT * FROM aa)); -SELECT _q_1.a AS a FROM (SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS _q_0) AS _q_1; +SELECT "_1".a AS a FROM (SELECT "_0".a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS "_0") AS "_1"; with cte1 as (SELECT cola, colb FROM tb UNION ALL SELECT colc, cold FROM tb2) SELECT cola FROM cte1; WITH cte1 AS (SELECT tb.cola AS cola FROM tb AS tb UNION ALL SELECT tb2.colc AS colc FROM tb2 AS tb2) SELECT cte1.cola AS cola FROM cte1 AS cte1; SELECT * FROM ((SELECT c FROM t1) JOIN t2); -SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2); +SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS "_0", t2 AS t2); SELECT a, d FROM (SELECT 1 a, 2 c, 3 d, 4 e UNION ALL BY NAME SELECT 6 c, 7 d, 8 a, 9 e); -SELECT _q_0.a AS a, _q_0.d AS d FROM (SELECT 1 AS a, 3 AS d UNION ALL BY NAME SELECT 7 AS d, 8 AS a) AS _q_0; +SELECT "_0".a AS a, "_0".d AS d FROM (SELECT 1 AS a, 3 AS d UNION ALL BY NAME SELECT 7 AS d, 8 AS a) AS "_0"; SELECT a, b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) (SELECT a, b, c FROM cte1)); -SELECT _q_0.a AS a, _q_0.b AS b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b) SELECT cte1.a AS a, cte1.b AS b FROM cte1 AS cte1) AS _q_0; \ No newline at end of file +SELECT "_0".a AS a, "_0".b AS b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b) SELECT cte1.a AS a, cte1.b AS b FROM cte1 AS cte1) AS "_0"; \ No newline at end of file diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 6676812a77..9e1a8fe922 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -129,7 +129,7 @@ SELECT DATE(x.a) AS _col_0, DATE(x.b) AS c FROM x AS x GROUP BY DATE(x.a), DATE( # execute: false SELECT (SELECT MIN(a) FROM UNNEST([1, 2])) AS f FROM x GROUP BY 1; -SELECT (SELECT MIN(_q_0.a) AS _col_0 FROM UNNEST(ARRAY(1, 2)) AS _q_0) AS f FROM x AS x GROUP BY 1; +SELECT (SELECT MIN(_0.a) AS _col_0 FROM UNNEST(ARRAY(1, 2)) AS _0) AS f FROM x AS x GROUP BY 1; # dialect: bigquery WITH x AS (select 'a' as a, 1 as b) SELECT x.a AS c, y.a as d, SUM(x.b) AS y, FROM x join x as y on x.a = y.a group by 1, 2; @@ -176,11 +176,11 @@ SELECT DATE_TRUNC(x.a, MONTH) AS a FROM x AS x; # execute: false SELECT x FROM READ_PARQUET('path.parquet', hive_partition=1); -SELECT _q_0.x AS x FROM READ_PARQUET('path.parquet', hive_partition = 1) AS _q_0; +SELECT _0.x AS x FROM READ_PARQUET('path.parquet', hive_partition = 1) AS _0; # execute: false select * from (values (1, 2)); -SELECT _q_0._col_0 AS _col_0, _q_0._col_1 AS _col_1 FROM (VALUES (1, 2)) AS _q_0(_col_0, _col_1); +SELECT _0._col_0 AS _col_0, _0._col_1 AS _col_1 FROM (VALUES (1, 2)) AS _0(_col_0, _col_1); # execute: false select * from (values (1, 2)) x; @@ -267,7 +267,7 @@ SELECT g.generate_series AS generate_series FROM generate_series(0, 10) AS g(gen # execute: false # dialect: snowflake SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ORDER BY empid; -SELECT * FROM QUARTERLY_SALES AS QUARTERLY_SALES PIVOT(SUM(QUARTERLY_SALES.AMOUNT) FOR QUARTERLY_SALES.QUARTER IN (ANY ORDER BY QUARTER)) AS _Q_0 ORDER BY _Q_0.EMPID; +SELECT * FROM QUARTERLY_SALES AS QUARTERLY_SALES PIVOT(SUM(QUARTERLY_SALES.AMOUNT) FOR QUARTERLY_SALES.QUARTER IN (ANY ORDER BY QUARTER)) AS _0 ORDER BY _0.EMPID; # execute: false SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x) AS x FROM t; @@ -303,10 +303,10 @@ SELECT a FROM (SELECT a FROM x AS x) y; SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y; SELECT a FROM (SELECT a AS a FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT a FROM (SELECT a FROM (SELECT a FROM x)); -SELECT _q_1.a AS a FROM (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) AS _q_1; +SELECT _1.a AS a FROM (SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0) AS _1; SELECT x.a FROM x AS x JOIN (SELECT * FROM x) AS y ON x.a = y.a; SELECT x.a AS a FROM x AS x JOIN (SELECT x.a AS a, x.b AS b FROM x AS x) AS y ON x.a = y.a; @@ -333,7 +333,7 @@ SELECT a FROM x UNION SELECT a FROM x UNION SELECT a FROM x ORDER BY a; SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x ORDER BY a; SELECT a FROM (SELECT a FROM x UNION SELECT a FROM x) ORDER BY a; -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x) AS _q_0 ORDER BY a; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x) AS _0 ORDER BY a; # title: nested subqueries in union ((select a from x where a < 1)) UNION ((select a from x where a > 2)); @@ -343,79 +343,79 @@ SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar, _q_0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar, _0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar, _q_0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar, _0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL CORRESPONDING BY (foo, bar) SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz); -SELECT _q_0.foo AS foo, _q_0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _q_0; +SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar); -SELECT _q_0.bar AS bar FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar) AS _q_0; +SELECT _0.bar AS bar FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar) AS _0; # dialect: bigquery # execute: false SELECT * FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar); -SELECT _q_0.foo AS foo, _q_0.qux AS qux FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) AS _q_0; +SELECT _0.foo AS foo, _0.qux AS qux FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) AS _0; # dialect: bigquery # execute: false SELECT * FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo); -SELECT _q_0.foo AS foo FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo) AS _q_0; +SELECT _0.foo AS foo FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo) AS _0; # Title: Nested set operations with modifiers # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 FULL OUTER UNION ALL BY NAME (SELECT * FROM t3 FULL OUTER UNION ALL BY NAME SELECT * FROM t4)))); -WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _q_0.a AS a, _q_0.b AS b, _q_0.c AS c, _q_0.d AS d, _q_0.e AS e, _q_0.f AS f FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL OUTER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _q_0); +WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _0.a AS a, _0.b AS b, _0.c AS c, _0.d AS d, _0.e AS e, _0.f AS f FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL OUTER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _0); # Title: Nested set operations with different modifiers (FULL + INNER) # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 INNER UNION ALL BY NAME (SELECT * FROM t3 FULL OUTER UNION ALL BY NAME SELECT * FROM t4)))); -WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _q_0.a AS a, _q_0.b AS b, _q_0.c AS c FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 INNER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _q_0); +WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _0.a AS a, _0.b AS b, _0.c AS c FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 INNER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _0); # Title: Nested set operations with different modifiers (FULL + LEFT) # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 FULL UNION ALL BY NAME (SELECT * FROM t3 LEFT UNION ALL BY NAME SELECT * FROM t4)))); -WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT _q_0.a AS a, _q_0.b AS b, _q_0.c AS c, _q_0.d AS d FROM ((SELECT t1.a AS a, t1.b AS b, t1.c AS c, t1.d AS d FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 LEFT UNION ALL BY NAME SELECT t4.d AS d, t4.e AS e FROM t4 AS t4))) AS _q_0); +WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT _0.a AS a, _0.b AS b, _0.c AS c, _0.d AS d FROM ((SELECT t1.a AS a, t1.b AS b, t1.c AS c, t1.d AS d FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 LEFT UNION ALL BY NAME SELECT t4.d AS d, t4.e AS e FROM t4 AS t4))) AS _0); -------------------------------------- -- Subqueries @@ -436,7 +436,7 @@ WITH t1(c1) AS (SELECT 1), t2(c2) AS (SELECT 2) SELECT (SELECT c1 FROM t2) FROM WITH t1 AS (SELECT 1 AS c1), t2 AS (SELECT 2 AS c2) SELECT (SELECT t1.c1 AS c1 FROM t2 AS t2) AS _col_0 FROM t1 AS t1; SELECT a FROM (SELECT a FROM x) WHERE a IN (SELECT b FROM (SELECT b FROM y)); -SELECT _q_1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_1 WHERE _q_1.a IN (SELECT _q_0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _q_0); +SELECT _1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _1 WHERE _1.a IN (SELECT _0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _0); # dialect: mysql # execute: false @@ -485,10 +485,10 @@ SELECT x.*, y.* FROM x JOIN y ON x.b = y.b; SELECT x.a AS a, x.b AS b, y.b AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; SELECT a FROM (SELECT * FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS _0; SELECT * FROM (SELECT a FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT * FROM x GROUP BY 1, 2; SELECT x.a AS a, x.b AS b FROM x AS x GROUP BY x.a, x.b; @@ -565,30 +565,30 @@ SELECT ((structs.nested_0).nested_1).a_2 AS a_2, ((structs.nested_0).nested_1).n # title: CSV files are not scanned by default # execute: false SELECT * FROM READ_CSV('file.csv'); -SELECT * FROM READ_CSV('file.csv') AS _q_0; +SELECT * FROM READ_CSV('file.csv') AS _0; # dialect: clickhouse # Title: Expand tuples in VALUES using the structure provided # execute: false SELECT * FROM VALUES ('person String, place String', ('Noah', 'Paris')); -SELECT _q_0.person AS person, _q_0.place AS place FROM VALUES ('person String, place String', ('Noah', 'Paris')) AS _q_0(person, place); +SELECT _0.person AS person, _0.place AS place FROM VALUES ('person String, place String', ('Noah', 'Paris')) AS _0(person, place); # dialect: clickhouse # Title: Expand tuples in VALUES using the default naming scheme in CH # execute: false SELECT * FROM VALUES ((1, 1), (2, 2)); -SELECT _q_0.c1 AS c1, _q_0.c2 AS c2 FROM VALUES ((1, 1), (2, 2)) AS _q_0(c1, c2); +SELECT _0.c1 AS c1, _0.c2 AS c2 FROM VALUES ((1, 1), (2, 2)) AS _0(c1, c2); # dialect: clickhouse # Title: Expand fields in VALUES using the default naming scheme in CH # execute: false SELECT * FROM VALUES (1, 2, 3); -SELECT _q_0.c1 AS c1 FROM VALUES ((1), (2), (3)) AS _q_0(c1); +SELECT _0.c1 AS c1 FROM VALUES ((1), (2), (3)) AS _0(c1); # title: Expand PIVOT column combinations # dialect: duckdb WITH cities AS (SELECT * FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT * FROM cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')); -WITH cities AS (SELECT t.country AS country, t.name AS name, t.year AS year, t.population AS population FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT _q_0.nl_2000_amsterdam_total AS nl_2000_amsterdam_total, _q_0.nl_2000_amsterdam_count AS nl_2000_amsterdam_count, _q_0.nl_2000_seattle_total AS nl_2000_seattle_total, _q_0.nl_2000_seattle_count AS nl_2000_seattle_count, _q_0.nl_2010_amsterdam_total AS nl_2010_amsterdam_total, _q_0.nl_2010_amsterdam_count AS nl_2010_amsterdam_count, _q_0.nl_2010_seattle_total AS nl_2010_seattle_total, _q_0.nl_2010_seattle_count AS nl_2010_seattle_count, _q_0.us_2000_amsterdam_total AS us_2000_amsterdam_total, _q_0.us_2000_amsterdam_count AS us_2000_amsterdam_count, _q_0.us_2000_seattle_total AS us_2000_seattle_total, _q_0.us_2000_seattle_count AS us_2000_seattle_count, _q_0.us_2010_amsterdam_total AS us_2010_amsterdam_total, _q_0.us_2010_amsterdam_count AS us_2010_amsterdam_count, _q_0.us_2010_seattle_total AS us_2010_seattle_total, _q_0.us_2010_seattle_count AS us_2010_seattle_count FROM cities AS cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')) AS _q_0; +WITH cities AS (SELECT t.country AS country, t.name AS name, t.year AS year, t.population AS population FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT _0.nl_2000_amsterdam_total AS nl_2000_amsterdam_total, _0.nl_2000_amsterdam_count AS nl_2000_amsterdam_count, _0.nl_2000_seattle_total AS nl_2000_seattle_total, _0.nl_2000_seattle_count AS nl_2000_seattle_count, _0.nl_2010_amsterdam_total AS nl_2010_amsterdam_total, _0.nl_2010_amsterdam_count AS nl_2010_amsterdam_count, _0.nl_2010_seattle_total AS nl_2010_seattle_total, _0.nl_2010_seattle_count AS nl_2010_seattle_count, _0.us_2000_amsterdam_total AS us_2000_amsterdam_total, _0.us_2000_amsterdam_count AS us_2000_amsterdam_count, _0.us_2000_seattle_total AS us_2000_seattle_total, _0.us_2000_seattle_count AS us_2000_seattle_count, _0.us_2010_amsterdam_total AS us_2010_amsterdam_total, _0.us_2010_amsterdam_count AS us_2010_amsterdam_count, _0.us_2010_seattle_total AS us_2010_seattle_total, _0.us_2010_seattle_count AS us_2010_seattle_count FROM cities AS cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')) AS _0; -------------------------------------- -- CTEs @@ -787,11 +787,11 @@ SELECT /*+ BROADCAST(y) */ x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b; -------------------------------------- # execute: false SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; -SELECT _q_0.c AS c FROM x AS x LATERAL VIEW EXPLODE(x.a) _q_0 AS c; +SELECT _0.c AS c FROM x AS x LATERAL VIEW EXPLODE(x.a) _0 AS c; # execute: false SELECT c FROM xx LATERAL VIEW EXPLODE (a) AS c; -SELECT _q_0.c AS c FROM xx AS xx LATERAL VIEW EXPLODE(xx.a) _q_0 AS c; +SELECT _0.c AS c FROM xx AS xx LATERAL VIEW EXPLODE(xx.a) _0 AS c; # execute: false SELECT c FROM x LATERAL VIEW EXPLODE (a) t AS c; @@ -832,7 +832,7 @@ SELECT x.a AS a, i.b AS b FROM x AS x CROSS JOIN UNNEST(SPLIT(CAST(x.b AS VARCHA # execute: false SELECT c FROM (SELECT 1 a) AS x LATERAL VIEW EXPLODE(a) AS c; -SELECT _q_0.c AS c FROM (SELECT 1 AS a) AS x LATERAL VIEW EXPLODE(x.a) _q_0 AS c; +SELECT _0.c AS c FROM (SELECT 1 AS a) AS x LATERAL VIEW EXPLODE(x.a) _0 AS c; # execute: false SELECT * FROM foo(bar) AS t(c1, c2, c3); @@ -908,7 +908,7 @@ FROM ( i + 1 AS j FROM x ); -SELECT _q_0.i AS i, _q_0.j AS j FROM (SELECT x.a + 1 AS i, x.a + 1 + 1 AS j FROM x AS x) AS _q_0; +SELECT _0.i AS i, _0.j AS j FROM (SELECT x.a + 1 AS i, x.a + 1 + 1 AS j FROM x AS x) AS _0; # title: wrap expanded alias to ensure operator precedence isnt broken # execute: false @@ -956,7 +956,7 @@ WITH RECURSIVE rec AS (SELECT id, parent_id AS parent, 1 AS level FROM (SELECT 1 WITH RECURSIVE rec AS (SELECT t.id AS id, t.parent_id AS parent, 1 AS level FROM (SELECT 1 AS id, 0 AS parent_id) AS t WHERE t.parent_id = 0 UNION ALL SELECT s.num AS num, s.val AS x, 2 AS level FROM (SELECT 2 AS num, 1 AS val) AS s WHERE s.val = 1 UNION ALL SELECT rec.id + 10 AS id, rec.id AS parent, rec.level + 1 AS level FROM rec AS rec WHERE rec.level < 3) SELECT rec.id AS id, rec.parent AS parent, rec.level AS level FROM rec AS rec ORDER BY rec.id; WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT * FROM (SELECT c + 1 AS c FROM t WHERE c <= 3 UNION ALL SELECT c + 2 AS c FROM t WHERE c <= 3)) SELECT c FROM t ORDER BY c; -WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT _q_0.c AS c FROM (SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 3 UNION ALL SELECT t.c + 2 AS c FROM t AS t WHERE t.c <= 3) AS _q_0) SELECT t.c AS c FROM t AS t ORDER BY c; +WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT _0.c AS c FROM (SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 3 UNION ALL SELECT t.c + 2 AS c FROM t AS t WHERE t.c <= 3) AS _0) SELECT t.c AS c FROM t AS t ORDER BY c; -------------------------------------- -- Wrapped tables / join constructs @@ -974,24 +974,24 @@ SELECT * FROM ((a AS a CROSS JOIN ((b AS b CROSS JOIN c AS c) CROSS JOIN (d AS d # execute: false SELECT * FROM ((SELECT * FROM tbl)); -SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _q_0); +SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _0); # execute: false SELECT * FROM ((SELECT c FROM t1) CROSS JOIN t2); -SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0 CROSS JOIN t2 AS t2); +SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _0 CROSS JOIN t2 AS t2); # execute: false SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c); -SELECT y.b AS b, y.c AS c, _q_0.a AS a, _q_0.b AS b FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.c); +SELECT y.b AS b, y.c AS c, _0.a AS a, _0.b AS b FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _0 INNER JOIN y AS y ON _0.a = y.c); SELECT x.a, y.b, z.c FROM x LEFT JOIN (y INNER JOIN z ON y.c = z.c) ON x.b = y.b; SELECT x.a AS a, y.b AS b, z.c AS c FROM x AS x LEFT JOIN (y AS y INNER JOIN z AS z ON y.c = z.c) ON x.b = y.b; SELECT * FROM ((SELECT * FROM x) INNER JOIN (SELECT * FROM y) ON a = c); -SELECT _q_0.a AS a, _q_0.b AS b, _q_1.b AS b, _q_1.c AS c FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0 INNER JOIN (SELECT y.b AS b, y.c AS c FROM y AS y) AS _q_1 ON _q_0.a = _q_1.c); +SELECT _0.a AS a, _0.b AS b, _1.b AS b, _1.c AS c FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _0 INNER JOIN (SELECT y.b AS b, y.c AS c FROM y AS y) AS _1 ON _0.a = _1.c); SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b); -SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.b); +SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _0 INNER JOIN y AS y ON _0.a = y.b); SELECT a, c FROM x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y TABLESAMPLE SYSTEM (10 ROWS); SELECT x.a AS a, y.c AS c FROM x AS x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y AS y TABLESAMPLE SYSTEM (10 ROWS); diff --git a/tests/fixtures/optimizer/qualify_columns__with_invisible.sql b/tests/fixtures/optimizer/qualify_columns__with_invisible.sql index 1e070158e2..a65b1834bb 100644 --- a/tests/fixtures/optimizer/qualify_columns__with_invisible.sql +++ b/tests/fixtures/optimizer/qualify_columns__with_invisible.sql @@ -11,10 +11,10 @@ SELECT x.b AS b FROM x AS x; -- Derived tables -------------------------------------- SELECT x.a FROM x AS x CROSS JOIN (SELECT * FROM x); -SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT x.a AS a FROM x AS x) AS _0; SELECT x.b FROM x AS x CROSS JOIN (SELECT b FROM x); -SELECT x.b AS b FROM x AS x CROSS JOIN (SELECT x.b AS b FROM x AS x) AS _q_0; +SELECT x.b AS b FROM x AS x CROSS JOIN (SELECT x.b AS b FROM x AS x) AS _0; -------------------------------------- -- Expand * @@ -29,7 +29,7 @@ SELECT * FROM y CROSS JOIN z ON y.c = z.c; SELECT y.b AS b, z.b AS b FROM y AS y CROSS JOIN z AS z ON y.c = z.c; SELECT a FROM (SELECT * FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT * FROM (SELECT a FROM x); -SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; diff --git a/tests/fixtures/optimizer/qualify_columns_ddl.sql b/tests/fixtures/optimizer/qualify_columns_ddl.sql index 75d84ca2ff..7b38b8eb86 100644 --- a/tests/fixtures/optimizer/qualify_columns_ddl.sql +++ b/tests/fixtures/optimizer/qualify_columns_ddl.sql @@ -12,7 +12,7 @@ CREATE TABLE foo AS SELECT tbl.a AS a FROM tbl AS tbl; # title: Create with complex CTE with derived table WITH cte AS (SELECT a FROM (SELECT a FROM x)) CREATE TABLE s AS SELECT * FROM cte; -WITH cte AS (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) CREATE TABLE s AS SELECT cte.a AS a FROM cte AS cte; +WITH cte AS (SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0) CREATE TABLE s AS SELECT cte.a AS a FROM cte AS cte; # title: Create wtih multiple CTEs WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte2; @@ -24,11 +24,11 @@ WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte # title: Create with multiple derived tables CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM y)); -CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _q_0) AS _q_1; +CREATE TABLE s AS SELECT _1.b AS b FROM (SELECT _0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _0) AS _1; # title: Create with a CTE and a derived table WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM cte)); -WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _q_0) AS _q_1; +WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _1.b AS b FROM (SELECT _0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _0) AS _1; # title: Insert with CTE # dialect: spark diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 662f7c37c6..66653574a6 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -68,7 +68,7 @@ SELECT (SELECT y.c FROM c.db.y AS y) FROM c.db.x AS x; # title: pivoted table SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')); -SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS _q_0; +SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS "_0"; # title: pivoted table, pivot has alias SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')) AS piv; @@ -135,28 +135,28 @@ SELECT t.a FROM (tbl AS tbl) AS t; SELECT t.a FROM (SELECT * FROM c.db.tbl AS tbl) AS t; # title: wrapped aliased table with outer alias -SELECT * FROM ((((tbl AS tbl)))) AS _q_0; -SELECT * FROM (SELECT * FROM c.db.tbl AS tbl) AS _q_0; +SELECT * FROM ((((tbl AS tbl)))) AS "_0"; +SELECT * FROM (SELECT * FROM c.db.tbl AS tbl) AS "_0"; # title: join construct with three tables -SELECT * FROM (tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3) AS _q_0; -SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS _q_0; +SELECT * FROM (tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3) AS "_0"; +SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS "_0"; # title: join construct with three tables and redundant set of parentheses -SELECT * FROM ((tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3)) AS _q_0; -SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS _q_0; +SELECT * FROM ((tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3)) AS "_0"; +SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS "_0"; # title: join construct within join construct -SELECT * FROM (tbl1 AS tbl1 JOIN (tbl2 AS tbl2 JOIN tbl3 AS tbl3 ON id2 = id3) AS _q_0 ON id1 = id3) AS _q_1; -SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN (SELECT * FROM c.db.tbl2 AS tbl2 JOIN c.db.tbl3 AS tbl3 ON id2 = id3) AS _q_0 ON id1 = id3) AS _q_1; +SELECT * FROM (tbl1 AS tbl1 JOIN (tbl2 AS tbl2 JOIN tbl3 AS tbl3 ON id2 = id3) AS "_0" ON id1 = id3) AS "_1"; +SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN (SELECT * FROM c.db.tbl2 AS tbl2 JOIN c.db.tbl3 AS tbl3 ON id2 = id3) AS "_0" ON id1 = id3) AS "_1"; # title: wrapped subquery without alias SELECT * FROM ((SELECT * FROM t)); -SELECT * FROM ((SELECT * FROM c.db.t AS t) AS _q_0); +SELECT * FROM ((SELECT * FROM c.db.t AS t) AS "_0"); # title: wrapped subquery without alias joined with a table SELECT * FROM ((SELECT * FROM t1) INNER JOIN t2 ON a = b); -SELECT * FROM ((SELECT * FROM c.db.t1 AS t1) AS _q_0 INNER JOIN c.db.t2 AS t2 ON a = b); +SELECT * FROM ((SELECT * FROM c.db.t1 AS t1) AS "_0" INNER JOIN c.db.t2 AS t2 ON a = b); # title: lateral unnest with alias SELECT x FROM t, LATERAL UNNEST(t.xs) AS x; @@ -164,7 +164,7 @@ SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS x; # title: lateral unnest without alias SELECT x FROM t, LATERAL UNNEST(t.xs); -SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS _q_0; +SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS "_0"; # title: table with ordinality SELECT * FROM t CROSS JOIN JSON_ARRAY_ELEMENTS(t.response) WITH ORDINALITY AS kv_json; @@ -213,5 +213,50 @@ WITH cte AS (SELECT 1 AS c, 'name' AS name) UPDATE t SET name = cte.name FROM ct WITH cte AS (SELECT 1 AS c, 'name' AS name) UPDATE c.db.t SET name = cte.name FROM cte WHERE cte.c = 1; # title: avoid qualifying CTE with DELETE -WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE t FROM t AS t INNER JOIN cte ON t.id = cte.c -WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE c.db.t FROM c.db.t AS t INNER JOIN cte ON t.id = cte.c \ No newline at end of file +WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE t FROM t AS t INNER JOIN cte ON t.id = cte.c; +WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE c.db.t FROM c.db.t AS t INNER JOIN cte ON t.id = cte.c; + +# title: canonicalize single table alias +# canonicalize_table_aliases: true +SELECT * FROM t; +SELECT * FROM c.db.t AS "_0"; + +# title: canonicalize join table aliases +# canonicalize_table_aliases: true +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id; +SELECT * FROM c.db.t1 AS "_0" JOIN c.db.t2 AS "_1" ON "_0".id = "_1".id; + +# title: canonicalize join with different databases +# canonicalize_table_aliases: true +SELECT * FROM db1.users JOIN db2.users ON db1.users.id = db2.users.id; +SELECT * FROM c.db1.users AS "_0" JOIN c.db2.users AS "_1" ON "_0".id = "_1".id; + +# title: canonicalize CTE alias +# canonicalize_table_aliases: true +WITH cte AS (SELECT * FROM t) SELECT * FROM cte; +WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1"; + +# title: canonicalize subquery alias +# canonicalize_table_aliases: true +SELECT * FROM (SELECT * FROM t); +SELECT * FROM (SELECT * FROM c.db.t AS "_0") AS "_1"; + +# title: canonicalize multiple tables with subquery +# canonicalize_table_aliases: true +SELECT * FROM t1, (SELECT * FROM t2) AS sub, t3; +SELECT * FROM c.db.t1 AS "_2", (SELECT * FROM c.db.t2 AS "_0") AS "_1", c.db.t3 AS "_3"; + +# title: canonicalize CTE with PIVOT +# canonicalize_table_aliases: true +WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y')); +WITH cte AS (SELECT * FROM c.db.t AS "_0") SELECT * FROM cte AS "_1" PIVOT(SUM(c) FOR v IN ('x', 'y')) AS "_2"; + +# title: canonicalize sources that reference external columns +# canonicalize_table_aliases: true +SELECT * FROM x WHERE x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a LIMIT 10); +SELECT * FROM c.db.x AS "_1" WHERE "_1".a = (SELECT SUM("_0".c) AS c FROM c.db.y AS "_0" WHERE "_0".a = "_1".a LIMIT 10); + +# title: canonicalize sources that have colliding aliases +# canonicalize_table_aliases: true +SELECT t.foo FROM t AS t, (SELECT t.bar FROM t AS t); +SELECT "_2".foo FROM c.db.t AS "_2", (SELECT "_0".bar FROM c.db.t AS "_0") AS "_1"; diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 04648cfadd..d14ad46830 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -2539,7 +2539,7 @@ WITH "item_2" AS ( "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id" FROM "item" AS "item" -), "_q_0" AS ( +), "_0" AS ( SELECT "iss"."i_brand_id" AS "brand_id", "iss"."i_class_id" AS "class_id", @@ -2619,10 +2619,10 @@ WITH "item_2" AS ( SELECT "item"."i_item_sk" AS "ss_item_sk" FROM "item_2" AS "item" - JOIN "_q_0" AS "_q_0" - ON "_q_0"."brand_id" = "item"."i_brand_id" - AND "_q_0"."category_id" = "item"."i_category_id" - AND "_q_0"."class_id" = "item"."i_class_id" + JOIN "_0" AS "_0" + ON "_0"."brand_id" = "item"."i_brand_id" + AND "_0"."category_id" = "item"."i_category_id" + AND "_0"."class_id" = "item"."i_class_id" GROUP BY "item"."i_item_sk" ), "_u_1" AS ( @@ -3394,7 +3394,7 @@ WITH "frequent_ss_items" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk" FROM "customer" AS "customer" -), "_q_0" AS ( +), "_0" AS ( SELECT SUM("store_sales"."ss_quantity" * "store_sales"."ss_sales_price") AS "csales" FROM "store_sales" AS "store_sales" @@ -3407,8 +3407,8 @@ WITH "frequent_ss_items" AS ( "customer"."c_customer_sk" ), "max_store_sales" AS ( SELECT - MAX("_q_0"."csales") AS "tpcds_cmax" - FROM "_q_0" AS "_q_0" + MAX("_0"."csales") AS "tpcds_cmax" + FROM "_0" AS "_0" ), "best_ss_customer" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk" @@ -3440,7 +3440,7 @@ WITH "frequent_ss_items" AS ( FROM "best_ss_customer" AS "best_ss_customer" GROUP BY "best_ss_customer"."c_customer_sk" -), "_q_1" AS ( +), "_1" AS ( SELECT "catalog_sales"."cs_quantity" * "catalog_sales"."cs_list_price" AS "sales" FROM "catalog_sales" AS "catalog_sales" @@ -3466,8 +3466,8 @@ WITH "frequent_ss_items" AS ( NOT "_u_3"."item_sk" IS NULL AND NOT "_u_4"."c_customer_sk" IS NULL ) SELECT - SUM("_q_1"."sales") AS "_col_0" -FROM "_q_1" AS "_q_1" + SUM("_1"."sales") AS "_col_0" +FROM "_1" AS "_1" LIMIT 100; -------------------------------------- diff --git a/tests/test_lineage.py b/tests/test_lineage.py index cad7704fb4..f7e43ecc6f 100644 --- a/tests/test_lineage.py +++ b/tests/test_lineage.py @@ -413,8 +413,8 @@ def test_subquery(self) -> None: self.assertEqual(node.name, "a") self.assertEqual(len(node.downstream), 1) node = node.downstream[0] - self.assertEqual(node.name, "_q_0.a") - self.assertEqual(node.reference_node_name, "_q_0") + self.assertEqual(node.name, "_0.a") + self.assertEqual(node.reference_node_name, "_0") def test_lineage_cte_union(self) -> None: query = """ @@ -480,7 +480,7 @@ def test_select_star(self) -> None: self.assertEqual(node.name, "x") downstream = node.downstream[0] - self.assertEqual(downstream.name, "_q_0.x") + self.assertEqual(downstream.name, "_0.x") self.assertEqual(downstream.source.sql(), "SELECT * FROM table_a AS table_a") downstream = downstream.downstream[0] @@ -565,7 +565,7 @@ def test_pivot_without_alias(self) -> None: """ node = lineage("other_a", sql) - self.assertEqual(node.downstream[0].name, "_q_0.value") + self.assertEqual(node.downstream[0].name, "_0.value") self.assertEqual(node.downstream[0].downstream[0].name, "sample_data.value") def test_pivot_with_alias(self) -> None: @@ -705,7 +705,7 @@ def test_pivot_with_subquery(self) -> None: node = lineage("product_type", sql, dialect="duckdb", schema=schema) self.assertEqual(node.downstream[0].name, "cte.product_type") - self.assertEqual(node.downstream[0].downstream[0].name, "_q_0.product_type") + self.assertEqual(node.downstream[0].downstream[0].name, "_0.product_type") self.assertEqual( node.downstream[0].downstream[0].downstream[0].name, "loan_ledger.product_type", @@ -713,7 +713,7 @@ def test_pivot_with_subquery(self) -> None: node = lineage('"2024-10"', sql, dialect="duckdb", schema=schema) self.assertEqual(node.downstream[0].name, "cte.2024-10") - self.assertEqual(node.downstream[0].downstream[0].name, "_q_0.loan_id") + self.assertEqual(node.downstream[0].downstream[0].name, "_0.loan_id") self.assertEqual(node.downstream[0].downstream[0].downstream[0].name, "loan_ledger.loan_id") def test_copy_flag(self) -> None: diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index da525b8245..410d81545d 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -163,11 +163,14 @@ def check_file( title = meta.get("title") or f"{i}, {sql}" if only and title != only: continue + dialect = meta.get("dialect") leave_tables_isolated = meta.get("leave_tables_isolated") validate_qualify_columns = meta.get("validate_qualify_columns") + canonicalize_table_aliases = meta.get("canonicalize_table_aliases") + + func_kwargs = kwargs.copy() - func_kwargs = {**kwargs} if leave_tables_isolated is not None: func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated) @@ -175,9 +178,10 @@ def check_file( func_kwargs["validate_qualify_columns"] = string_to_bool( validate_qualify_columns ) - if dialect: func_kwargs["dialect"] = dialect + if canonicalize_table_aliases: + func_kwargs["canonicalize_table_aliases"] = canonicalize_table_aliases future = pool.submit(parse_and_optimize, func, sql, dialect, **func_kwargs) results[future] = ( @@ -259,7 +263,7 @@ def test_qualify_tables(self): db="db", catalog="catalog", ).sql(), - "WITH cte AS (SELECT * FROM catalog.db.t AS t) SELECT * FROM cte AS cte PIVOT(SUM(c) FOR v IN ('x', 'y')) AS _q_0", + "WITH cte AS (SELECT * FROM catalog.db.t AS t) SELECT * FROM cte AS cte PIVOT(SUM(c) FOR v IN ('x', 'y')) AS \"_0\"", ) self.assertEqual( @@ -434,7 +438,7 @@ def test_qualify_columns(self, logger): self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one( - "SELECT id, dt, v FROM (SELECT t1.id, t1.dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp USING (id) LEFT JOIN t2 AS t2 USING (other_id, dt, common) WHERE t1.id > 10 GROUP BY 1, 2) AS _q_0", + "SELECT id, dt, v FROM (SELECT t1.id, t1.dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp USING (id) LEFT JOIN t2 AS t2 USING (other_id, dt, common) WHERE t1.id > 10 GROUP BY 1, 2) AS `_0`", dialect="bigquery", ), schema=MappingSchema( @@ -446,7 +450,7 @@ def test_qualify_columns(self, logger): dialect="bigquery", ), ).sql(dialect="bigquery"), - "SELECT _q_0.id AS id, _q_0.dt AS dt, _q_0.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS _q_0", + "SELECT `_0`.id AS id, `_0`.dt AS dt, `_0`.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS `_0`", ) # Detection of correlation where columns are referenced in derived tables nested within subqueries