feat(hive,spark)!: Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark (#6004)

tsamaras · georgesittas · web-flow · commit 56c8b3bbff74 · 2025-10-08T15:45:03.000+03:00
* hive - parse ALTER table CHANGE COLUMN syntax

* spark - parse alter table change column syntax

* hive - support cascade clause in change column statement

* cleanup cascade logic

* document constant

* fix formatting

* fixes for PR comments

 * updated syntax handling constant name
 * Inherit ALTER_PARSERS from parser.Parser
 * fix column name parsing
 * revised rename handling

* Update sqlglot/dialects/spark2.py

---------

Co-authored-by: Jo &lt;46752250+georgesittas@users.noreply.github.com&gt;
diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
@@ -525,6 +525,13 @@ class Dialect(metaclass=_Dialect):
     equivalent of CREATE SCHEMA is CREATE DATABASE.
     """
 
+    ALTER_TABLE_SUPPORTS_CASCADE = False
+    """
+    Hive by default does not update the schema of existing partitions when a column is changed.
+    the CASCADE clause is used to indicate that the change should be propagated to all existing partitions.
+    the Spark dialect, while derived from Hive, does not support the CASCADE clause.
+    """
+
     # Whether ADD is present for each column added by ALTER TABLE
     ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True
 
diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py
@@ -211,6 +211,7 @@ class Hive(Dialect):
     SAFE_DIVISION = True
     ARRAY_AGG_INCLUDES_NULLS = None
     REGEXP_EXTRACT_DEFAULT_GROUP = 1
+    ALTER_TABLE_SUPPORTS_CASCADE = True
 
     # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
     NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
@@ -310,6 +311,10 @@ class Parser(parser.Parser):
         VALUES_FOLLOWED_BY_PAREN = False
         JOINS_HAVE_EQUAL_PRECEDENCE = True
         ADD_JOIN_ON_TRUE = True
+        ALTER_TABLE_PARTITIONS = True
+
+        CHANGE_COLUMN_ALTER_SYNTAX = False
+        # Whether the dialect supports using ALTER COLUMN syntax with CHANGE COLUMN.
 
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,
@@ -378,6 +383,11 @@ class Parser(parser.Parser):
             ),
         }
 
+        ALTER_PARSERS = {
+            **parser.Parser.ALTER_PARSERS,
+            "CHANGE": lambda self: self._parse_alter_table_change(),
+        }
+
         def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
             if not self._match(TokenType.L_PAREN, advance=False):
                 self._retreat(self._index - 1)
@@ -451,6 +461,35 @@ def _parse_types(
 
             return this
 
+        def _parse_alter_table_change(self) -> t.Optional[exp.Expression]:
+            self._match(TokenType.COLUMN)
+            this = self._parse_field(any_token=True)
+
+            if self.CHANGE_COLUMN_ALTER_SYNTAX and self._match_text_seq("TYPE"):
+                return self.expression(
+                    exp.AlterColumn,
+                    this=this,
+                    dtype=self._parse_types(schema=True),
+                )
+
+            column_new = self._parse_field(any_token=True)
+            dtype = self._parse_types(schema=True)
+
+            comment = self._match(TokenType.COMMENT) and self._parse_string()
+
+            if not this or not column_new or not dtype:
+                self.raise_error(
+                    "Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'"
+                )
+
+            return self.expression(
+                exp.AlterColumn,
+                this=this,
+                rename_to=column_new,
+                dtype=dtype,
+                comment=comment,
+            )
+
         def _parse_partition_and_order(
             self,
         ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
@@ -500,6 +539,7 @@ class Generator(generator.Generator):
         PAD_FILL_PATTERN_IS_REQUIRED = True
         SUPPORTS_MEDIAN = False
         ARRAY_SIZE_NAME = "SIZE"
+        ALTER_SET_TYPE = ""
 
         EXPRESSIONS_WITHOUT_NESTED_CTES = {
             exp.Insert,
@@ -757,6 +797,32 @@ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
                 ),
             )
 
+        def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
+            this = self.sql(expression, "this")
+            new_name = self.sql(expression, "rename_to") or this
+            dtype = self.sql(expression, "dtype")
+            comment = (
+                f" COMMENT {self.sql(expression, 'comment')}"
+                if self.sql(expression, "comment")
+                else ""
+            )
+            default = self.sql(expression, "default")
+            visible = expression.args.get("visible")
+            allow_null = expression.args.get("allow_null")
+            drop = expression.args.get("drop")
+
+            if any([default, drop, visible, allow_null, drop]):
+                self.unsupported("Unsupported CHANGE COLUMN syntax")
+
+            if not dtype:
+                self.unsupported("CHANGE COLUMN without a type is not supported")
+
+            return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}"
+
+        def renamecolumn_sql(self, expression: exp.RenameColumn) -> str:
+            self.unsupported("Cannot rename columns without data type defined in Hive")
+            return ""
+
         def alterset_sql(self, expression: exp.AlterSet) -> str:
             exprs = self.expressions(expression, flat=True)
             exprs = f" {exprs}" if exprs else ""
diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py
@@ -151,6 +151,8 @@ def _annotate_by_similar_args(
 
 
 class Spark2(Hive):
+    ALTER_TABLE_SUPPORTS_CASCADE = False
+
     ANNOTATORS = {
         **Hive.ANNOTATORS,
         exp.Substring: lambda self, e: self._annotate_by_args(e, "this"),
@@ -172,6 +174,7 @@ class Tokenizer(Hive.Tokenizer):
 
     class Parser(Hive.Parser):
         TRIM_PATTERN_FIRST = True
+        CHANGE_COLUMN_ALTER_SYNTAX = True
 
         FUNCTIONS = {
             **Hive.Parser.FUNCTIONS,
@@ -248,6 +251,7 @@ class Generator(Hive.Generator):
         QUERY_HINTS = True
         NVL2_SUPPORTED = True
         CAN_IMPLEMENT_ARRAY_ANY = True
+        ALTER_SET_TYPE = "TYPE"
 
         PROPERTIES_LOCATION = {
             **Hive.Generator.PROPERTIES_LOCATION,
@@ -364,3 +368,16 @@ def fileformatproperty_sql(self, expression: exp.FileFormatProperty) -> str:
                 return super().fileformatproperty_sql(expression)
 
             return f"USING {expression.name.upper()}"
+
+        def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
+            this = self.sql(expression, "this")
+            new_name = self.sql(expression, "rename_to") or this
+            comment = self.sql(expression, "comment")
+            if new_name == this:
+                if comment:
+                    return f"ALTER COLUMN {this} COMMENT {comment}"
+                return super().altercolumn_sql(expression)
+            return f"RENAME COLUMN {this} TO {new_name}"
+
+        def renamecolumn_sql(self, expression: exp.RenameColumn) -> str:
+            return super(Hive.Generator, self).renamecolumn_sql(expression)
diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py
@@ -1838,6 +1838,7 @@ class AlterColumn(Expression):
         "comment": False,
         "allow_null": False,
         "visible": False,
+        "rename_to": False,
     }
 
 
@@ -4957,6 +4958,7 @@ class Alter(Expression):
         "cluster": False,
         "not_valid": False,
         "check": False,
+        "cascade": False,
     }
 
     @property
diff --git a/sqlglot/generator.py b/sqlglot/generator.py
@@ -3625,10 +3625,15 @@ def alter_sql(self, expression: exp.Alter) -> str:
         kind = self.sql(expression, "kind")
         not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
         check = " WITH CHECK" if expression.args.get("check") else ""
+        cascade = (
+            " CASCADE"
+            if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE
+            else ""
+        )
         this = self.sql(expression, "this")
         this = f" {this}" if this else ""
 
-        return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}"
+        return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}{cascade}"
 
     def altersession_sql(self, expression: exp.AlterSession) -> str:
         items_sql = self.expressions(expression, flat=True)
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
@@ -1531,6 +1531,9 @@ def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]
     # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
     ALTER_RENAME_REQUIRES_COLUMN = True
 
+    # Whether Alter statements are allowed to contain Partition specifications
+    ALTER_TABLE_PARTITIONS = False
+
     # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree.
     # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is
     # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such
@@ -7732,7 +7735,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
             check = None
             cluster = None
         else:
-            this = self._parse_table(schema=True)
+            this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS)
             check = self._match_text_seq("WITH", "CHECK")
             cluster = self._parse_on_property() if self._match(TokenType.ON) else None
 
@@ -7744,6 +7747,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
             actions = ensure_list(parser(self))
             not_valid = self._match_text_seq("NOT", "VALID")
             options = self._parse_csv(self._parse_property)
+            cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")
 
             if not self._curr and actions:
                 return self.expression(
@@ -7757,6 +7761,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
                     cluster=cluster,
                     not_valid=not_valid,
                     check=check,
+                    cascade=cascade,
                 )
 
         return self._parse_as_command(start)
diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py
@@ -176,6 +176,44 @@ def test_ddl(self):
             },
         )
 
+        self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)")
+        self.validate_identity(
+            "ALTER TABLE x CHANGE a a VARCHAR(10)",
+            "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
+        )
+
+        self.validate_all(
+            "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
+            write={
+                "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
+                "spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
+            },
+        )
+        self.validate_all(
+            "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
+            write={
+                "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
+                "spark": "ALTER TABLE x ALTER COLUMN a COMMENT 'comment'",
+            },
+        )
+        self.validate_all(
+            "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
+            write={
+                "hive": "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
+                "spark": "ALTER TABLE x RENAME COLUMN a TO b",
+            },
+        )
+        self.validate_all(
+            "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
+            write={
+                "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
+                "spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
+            },
+        )
+
+        self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)")
+        self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE")
+
         self.validate_identity(
             """CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""",
         )
diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py
@@ -2,6 +2,7 @@
 
 from sqlglot import exp, parse_one
 from sqlglot.dialects.dialect import Dialects
+from sqlglot.errors import UnsupportedError
 from tests.dialects.test_dialect import Validator
 
 
@@ -132,6 +133,27 @@ def test_ddl(self):
                 "spark": "ALTER TABLE StudentInfo ADD COLUMNS (LastName STRING, DOB TIMESTAMP)",
             },
         )
+        self.validate_all(
+            "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
+            write={
+                "spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
+                "hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
+            },
+        )
+        self.validate_all(
+            "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
+            write={
+                "spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
+                "hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
+            },
+        )
+        self.validate_all(
+            "ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
+            write={
+                "spark": "ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
+                "hive": UnsupportedError,
+            },
+        )
         self.validate_all(
             "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)",
             write={

Original file line number	Diff line number	Diff line change
`@@ -1838,6 +1838,7 @@ class AlterColumn(Expression):`
`1838`	`1838`	`"comment": False,`
`1839`	`1839`	`"allow_null": False,`
`1840`	`1840`	`"visible": False,`
	`1841`	`+ "rename_to": False,`
`1841`	`1842`	`}`
`1842`	`1843`
`1843`	`1844`
`@@ -4957,6 +4958,7 @@ class Alter(Expression):`
`4957`	`4958`	`"cluster": False,`
`4958`	`4959`	`"not_valid": False,`
`4959`	`4960`	`"check": False,`
	`4961`	`+ "cascade": False,`
`4960`	`4962`	`}`
`4961`	`4963`
`4962`	`4964`	`@property`