tobymao · tobymao · Nov 26, 2025 · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
@@ -353,10 +353,15 @@ class BigQuery(Dialect):
     LOG_BASE_FIRST = False
     HEX_LOWERCASE = True
     FORCE_EARLY_ALIAS_REF_EXPANSION = True
+    EXPAND_ONLY_GROUP_ALIAS_REF = True
     PRESERVE_ORIGINAL_NAMES = True
     HEX_STRING_IS_INTEGER_TYPE = True
     BYTE_STRING_IS_BYTES_TYPE = True
     UUID_IS_STRING_TYPE = True
+    PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True
+    TABLES_REFERENCEABLE_AS_COLUMNS = True
+    SUPPORTS_STRUCT_STAR_EXPANSION = True
+    QUERY_RESULTS_ARE_STRUCTS = True
 
     # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap
     INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-'

diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
@@ -457,14 +457,126 @@ class Dialect(metaclass=_Dialect):
         to "WHERE id = 1 GROUP BY id HAVING id = 1"
     """
 
-    EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False
+    EXPAND_ONLY_GROUP_ALIAS_REF = False
     """Whether alias reference expansion before qualification should only happen for the GROUP BY clause."""
 
+    DISABLES_ALIAS_REF_EXPANSION = False
+    """
+    Whether alias reference expansion is disabled for this dialect.
+
+    Some dialects like Oracle do NOT support referencing aliases in projections or WHERE clauses.
+    The original expression must be repeated instead.
+
+    For example, in Oracle:
+        SELECT y.foo AS bar, bar * 2 AS baz FROM y  -- INVALID
+        SELECT y.foo AS bar, y.foo * 2 AS baz FROM y  -- VALID
+    """
+
+    SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = False
+    """
+    Whether alias references are allowed in JOIN ... ON clauses.
+
+    Most dialects do not support this, but Snowflake allows alias expansion in the JOIN ... ON
+    clause (and almost everywhere else)
+    # https://docs.snowflake.com/en/sql-reference/sql/select#usage-notes
+
+    For example, in Snowflake:
+        SELECT a.id AS user_id FROM a JOIN b ON user_id = b.id  -- VALID
+    """
+
     SUPPORTS_ORDER_BY_ALL = False
     """
     Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
     """
 
+    PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = False
+    """
+    Whether projection alias names can shadow table/source names in GROUP BY and HAVING clauses.
+
+    In BigQuery, when a projection alias has the same name as a source table, the alias takes
+    precedence in GROUP BY and HAVING clauses, and the table becomes inaccessible by that name.
+
+    For example, in BigQuery:
+        SELECT id, ARRAY_AGG(col) AS custom_fields
+        FROM custom_fields
+        GROUP BY id
+        HAVING id >= 1
+
+    The "custom_fields" source is shadowed by the projection alias, so we cannot qualify "id"
+    with "custom_fields" in GROUP BY/HAVING.
+    """
+
+    TABLES_REFERENCEABLE_AS_COLUMNS = False
+    """
+    Whether table names can be referenced as columns (treated as structs).
+
+    BigQuery allows tables to be referenced as columns in queries, automatically treating
+    them as struct values containing all the table's columns.
+
+    For example, in BigQuery:
+        SELECT t FROM my_table AS t  -- Returns entire row as a struct
+    """
+
+    SUPPORTS_STRUCT_STAR_EXPANSION = False
+    """
+    Whether the dialect supports expanding struct fields using star notation (e.g., struct_col.*).
+
+    BigQuery allows struct fields to be expanded with the star operator:
+        SELECT t.struct_col.* FROM table t
+    RisingWave also allows struct field expansion with the star operator using parentheses:
+        SELECT (t.struct_col).* FROM table t
+
+    This expands to all fields within the struct.
+    """
+
+    QUERY_RESULTS_ARE_STRUCTS = False
+    """
+    Whether query results have internal struct type representation for type inference.
+
+    In BigQuery, subqueries used as data sources are internally represented as
+    structs, enabling advanced type inference. For example:
+    - ARRAY(SELECT 'foo') unwraps to ARRAY<STRING>, not ARRAY<STRUCT<STRING>>
+    - Column types propagate correctly through subqueries
+
+    This does NOT mean subquery results can be accessed with dot notation.
+    For field access, use SELECT AS STRUCT explicitly:
+        SELECT (SELECT AS STRUCT 1 AS x, 2 AS y).x  -- Valid
+        SELECT (SELECT 1 AS x, 2 AS y).x            -- Invalid
+    """
+
+    REQUIRES_PARENTHESIZED_STRUCT_ACCESS = False
+    """
+    Whether struct field access requires parentheses around the expression.
+
+    RisingWave requires parentheses for struct field access in certain contexts:
+        SELECT (col.field).subfield FROM table  -- Parentheses required
+
+    Without parentheses, the parser may not correctly interpret nested struct access.
+
+    Reference: https://docs.risingwave.com/sql/data-types/struct#retrieve-data-in-a-struct
+    """
+
+    SUPPORTS_NULL_TYPE = False
+    """
+    Whether NULL/VOID is supported as a valid data type (not just a value).
+
+    Databricks and Spark v3+ support NULL as an actual type, allowing expressions like:
+        SELECT NULL AS col  -- Has type NULL, not just value NULL
+        CAST(x AS VOID)     -- Valid type cast
+    """
+
+    COALESCE_COMPARISON_NON_STANDARD = False
+    """
+    Whether COALESCE in comparisons has non-standard NULL semantics.
+
+    We can't convert `COALESCE(x, 1) = 2` into `NOT x IS NULL AND x = 2` for redshift,
+    because they are not always equivalent. For example,  if `x` is `NULL` and it comes
+    from a table, then the result is `NULL`, despite `FALSE AND NULL` evaluating to `FALSE`.
+
+    In standard SQL and most dialects, these expressions are equivalent, but Redshift treats
+    table NULLs differently in this context.
+    """
+
     HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
     """
     Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)

diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py
@@ -45,6 +45,7 @@ class Oracle(Dialect):
     NULL_ORDERING = "nulls_are_large"
     ON_CONDITION_EMPTY_BEFORE_ERROR = False
     ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False
+    DISABLES_ALIAS_REF_EXPANSION = True
 
     # See section 8: https://docs.oracle.com/cd/A97630_01/server.920/a96540/sql_elements9a.htm
     NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE

diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py
@@ -47,6 +47,7 @@ class Redshift(Postgres):
     COPY_PARAMS_ARE_CSV = False
     HEX_LOWERCASE = True
     HAS_DISTINCT_ARRAY_CONSTRUCTORS = True
+    COALESCE_COMPARISON_NON_STANDARD = True
 
     # ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html
     TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"

diff --git a/sqlglot/dialects/risingwave.py b/sqlglot/dialects/risingwave.py
@@ -8,6 +8,9 @@
 
 
 class RisingWave(Postgres):
+    REQUIRES_PARENTHESIZED_STRUCT_ACCESS = True
+    SUPPORTS_STRUCT_STAR_EXPANSION = True
+
     class Tokenizer(Postgres.Tokenizer):
         KEYWORDS = {
             **Postgres.Tokenizer.KEYWORDS,

diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py
@@ -553,6 +553,7 @@ class Snowflake(Dialect):
     ARRAY_AGG_INCLUDES_NULLS = None
     ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False
     TRY_CAST_REQUIRES_STRING = True
+    SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True
 
     EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
 

diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py
@@ -112,6 +112,7 @@ def _groupconcat_sql(self: Spark.Generator, expression: exp.GroupConcat) -> str:
 
 class Spark(Spark2):
     SUPPORTS_ORDER_BY_ALL = True
+    SUPPORTS_NULL_TYPE = True
 
     class Tokenizer(Spark2.Tokenizer):
         STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS = False

diff --git a/sqlglot/optimizer/annotate_types.py b/sqlglot/optimizer/annotate_types.py
@@ -14,7 +14,6 @@
 )
 from sqlglot.optimizer.scope import Scope, traverse_scope
 from sqlglot.schema import MappingSchema, Schema, ensure_schema
-from sqlglot.dialects.dialect import Dialect
 
 if t.TYPE_CHECKING:
     from sqlglot._typing import B, E
@@ -188,13 +187,9 @@ def __init__(
         overwrite_types: bool = True,
     ) -> None:
         self.schema = schema
-        self.dialect = Dialect.get_or_raise(schema.dialect)
-        self.expression_metadata = (
-            expression_metadata or Dialect.get_or_raise(schema.dialect).EXPRESSION_METADATA
-        )
-        self.coerces_to = (
-            coerces_to or Dialect.get_or_raise(schema.dialect).COERCES_TO or self.COERCES_TO
-        )
+        self.dialect = schema.dialect
+        self.expression_metadata = expression_metadata or schema.dialect.EXPRESSION_METADATA
+        self.coerces_to = coerces_to or schema.dialect.COERCES_TO or self.COERCES_TO
         self.binary_coercions = binary_coercions or self.BINARY_COERCIONS
 
         # Caches the ids of annotated sub-Expressions, to ensure we only visit them once
@@ -204,7 +199,7 @@ def __init__(
         self._null_expressions: t.Dict[int, exp.Expression] = {}
 
         # Databricks and Spark ≥v3 actually support NULL (i.e., VOID) as a type
-        self._supports_null_type = schema.dialect in ("databricks", "spark")
+        self._supports_null_type = schema.dialect.SUPPORTS_NULL_TYPE
 
         # Maps an exp.SetOperation's id (e.g. UNION) to its projection types. This is computed if the
         # exp.SetOperation is the expression of a scope source, as selecting from it multiple times
@@ -368,7 +363,9 @@ def annotate_scope(self, scope: Scope) -> None:
         # Iterate through all the expressions of the current scope in post-order, and annotate
         self._annotate_expression(scope.expression, scope, selects)
 
-        if self.schema.dialect == "bigquery" and isinstance(scope.expression, exp.Query):
+        if self.schema.dialect.QUERY_RESULTS_ARE_STRUCTS and isinstance(
+            scope.expression, exp.Query
+        ):
             struct_type = exp.DataType(
                 this=exp.DataType.Type.STRUCT,
                 expressions=[

diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py
@@ -54,9 +54,8 @@ def qualify_columns(
     schema = ensure_schema(schema, dialect=dialect)
     annotator = TypeAnnotator(schema)
     infer_schema = schema.empty if infer_schema is None else infer_schema
-    dialect = Dialect.get_or_raise(schema.dialect)
+    dialect = schema.dialect
     pseudocolumns = dialect.PSEUDOCOLUMNS
-    bigquery = dialect == "bigquery"
 
     for scope in traverse_scope(expression):
         if dialect.PREFER_CTE_ALIAS_COLUMN:
@@ -77,7 +76,7 @@ def qualify_columns(
                 scope,
                 resolver,
                 dialect,
-                expand_only_groupby=bigquery,
+                expand_only_groupby=dialect.EXPAND_ONLY_GROUP_ALIAS_REF,
             )
 
         _convert_columns_to_dots(scope, resolver)
@@ -107,7 +106,7 @@ def qualify_columns(
         # https://www.postgresql.org/docs/current/sql-select.html#SQL-DISTINCT
         _expand_order_by_and_distinct_on(scope, resolver)
 
-        if bigquery:
+        if dialect.SUPPORTS_STRUCT_STAR_EXPANSION:
             annotator.annotate_scope(scope)
 
     return expression
@@ -303,12 +302,11 @@ def _expand_alias_refs(
     """
     expression = scope.expression
 
-    if not isinstance(expression, exp.Select) or dialect == "oracle":
+    if not isinstance(expression, exp.Select) or dialect.DISABLES_ALIAS_REF_EXPANSION:
         return
 
     alias_to_expression: t.Dict[str, t.Tuple[exp.Expression, int]] = {}
     projections = {s.alias_or_name for s in expression.selects}
-    is_bigquery = dialect == "bigquery"
     replaced = False
 
     def replace_columns(
@@ -346,12 +344,12 @@ def replace_columns(
                 # SELECT x.a, max(x.b) as x FROM x GROUP BY 1 HAVING x > 1;
                 # If "HAVING x" is expanded to "HAVING max(x.b)", BQ would blindly replace the "x" reference with the projection MAX(x.b)
                 # i.e HAVING MAX(MAX(x.b).b), resulting in the error: "Aggregations of aggregations are not allowed"
-                if is_having and is_bigquery:
+                if is_having and dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES:
                     skip_replace = skip_replace or any(
                         node.parts[0].name in projections
                         for node in alias_expr.find_all(exp.Column)
                     )
-            elif is_bigquery and (is_group_by or is_having):
+            elif dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES and (is_group_by or is_having):
                 column_table = table.name if table else column.table
                 if column_table in projections:
                     # BigQuery's GROUP BY and HAVING clauses get confused if the column name
@@ -404,9 +402,7 @@ def replace_columns(
     replace_columns(expression.args.get("having"), resolve_table=True)
     replace_columns(expression.args.get("qualify"), resolve_table=True)
 
-    # Snowflake allows alias expansion in the JOIN ... ON clause (and almost everywhere else)
-    # https://docs.snowflake.com/en/sql-reference/sql/select#usage-notes
-    if dialect == "snowflake":
+    if dialect.SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS:
         for join in expression.args.get("joins") or []:
             replace_columns(join)
 
@@ -476,7 +472,7 @@ def _expand_positional_references(
             else:
                 select = select.this
 
-                if dialect == "bigquery":
+                if Dialect.get_or_raise(dialect).PROJECTION_ALIASES_SHADOW_SOURCE_NAMES:
                     if ambiguous_projections is None:
                         # When a projection name is also a source name and it is referenced in the
                         # GROUP BY clause, BQ can't understand what the identifier corresponds to
@@ -598,7 +594,7 @@ def _qualify_columns(
             if column_table:
                 column.set("table", column_table)
             elif (
-                resolver.schema.dialect == "bigquery"
+                resolver.schema.dialect.TABLES_REFERENCEABLE_AS_COLUMNS
                 and len(column.parts) == 1
                 and column_name in scope.selected_sources
             ):
@@ -767,10 +763,9 @@ def _expand_stars(
             if not pivot_output_columns:
                 pivot_output_columns = [c.alias_or_name for c in pivot.expressions]
 
-    is_bigquery = dialect == "bigquery"
-    is_risingwave = dialect == "risingwave"
-
-    if (is_bigquery or is_risingwave) and any(isinstance(col, exp.Dot) for col in scope.stars):
+    if dialect.SUPPORTS_STRUCT_STAR_EXPANSION and any(
+        isinstance(col, exp.Dot) for col in scope.stars
+    ):
         # Found struct expansion, annotate scope ahead of time
         annotator.annotate_scope(scope)
 
@@ -787,12 +782,15 @@ def _expand_stars(
                 _add_except_columns(expression.this, tables, except_columns)
                 _add_replace_columns(expression.this, tables, replace_columns)
                 _add_rename_columns(expression.this, tables, rename_columns)
-            elif is_bigquery:
+            elif (
+                dialect.SUPPORTS_STRUCT_STAR_EXPANSION
+                and not dialect.REQUIRES_PARENTHESIZED_STRUCT_ACCESS
+            ):
                 struct_fields = _expand_struct_stars_bigquery(expression)
                 if struct_fields:
                     new_selections.extend(struct_fields)
                     continue
-            elif is_risingwave:
+            elif dialect.REQUIRES_PARENTHESIZED_STRUCT_ACCESS:
                 struct_fields = _expand_struct_stars_risingwave(expression)
                 if struct_fields:
                     new_selections.extend(struct_fields)
@@ -809,7 +807,7 @@ def _expand_stars(
             columns = resolver.get_source_columns(table, only_visible=True)
             columns = columns or scope.outer_columns
 
-            if pseudocolumns and is_bigquery:
+            if pseudocolumns and dialect.SUPPORTS_STRUCT_STAR_EXPANSION:
                 columns = [name for name in columns if name.upper() not in pseudocolumns]
 
             if not columns or "*" in columns:
@@ -1094,7 +1092,7 @@ def get_source_columns(self, name: str, only_visible: bool = False) -> t.Sequenc
                 # in bigquery, unnest structs are automatically scoped as tables, so you can
                 # directly select a struct field in a query.
                 # this handles the case where the unnest is statically defined.
-                if self.schema.dialect == "bigquery":
+                if self.schema.dialect.UNNEST_COLUMN_ONLY:
                     if source.expression.is_type(exp.DataType.Type.STRUCT):
                         for k in source.expression.type.expressions:  # type: ignore
                             columns.append(k.name)

diff --git a/sqlglot/optimizer/simplify.py b/sqlglot/optimizer/simplify.py
@@ -142,10 +142,8 @@ def simplify_parens(expression: exp.Expression, dialect: DialectType) -> exp.Exp
     if isinstance(parent, (exp.SubqueryPredicate, exp.Bracket)):
         return expression
 
-    # Handle risingwave struct columns
-    # see https://docs.risingwave.com/sql/data-types/struct#retrieve-data-in-a-struct
     if (
-        dialect == "risingwave"
+        Dialect.get_or_raise(dialect).REQUIRES_PARENTHESIZED_STRUCT_ACCESS
         and isinstance(parent, exp.Dot)
         and (isinstance(parent.right, (exp.Identifier, exp.Star)))
     ):
@@ -1193,10 +1191,7 @@ def simplify_coalesce(self, expression: exp.Expression) -> exp.Expression:
         ):
             return expression.this
 
-        # We can't convert `COALESCE(x, 1) = 2` into `NOT x IS NULL AND x = 2` for redshift,
-        # because they are not always equivalent. For example,  if `x` is `NULL` and it comes
-        # from a table, then the result is `NULL`, despite `FALSE AND NULL` evaluating to `FALSE`
-        if self.dialect == "redshift":
+        if self.dialect.COALESCE_COMPARISON_NON_STANDARD:
             return expression
 
         if not isinstance(expression, self.COMPARISONS):