Skip to content

Commit 56c8b3b

Browse files
feat(hive,spark)!: Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark (#6004)
* hive - parse ALTER table CHANGE COLUMN syntax * spark - parse alter table change column syntax * hive - support cascade clause in change column statement * cleanup cascade logic * document constant * fix formatting * fixes for PR comments * updated syntax handling constant name * Inherit ALTER_PARSERS from parser.Parser * fix column name parsing * revised rename handling * Update sqlglot/dialects/spark2.py --------- Co-authored-by: Jo <[email protected]>
1 parent 15b3fac commit 56c8b3b

File tree

8 files changed

+164
-2
lines changed

8 files changed

+164
-2
lines changed

sqlglot/dialects/dialect.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,13 @@ class Dialect(metaclass=_Dialect):
525525
equivalent of CREATE SCHEMA is CREATE DATABASE.
526526
"""
527527

528+
ALTER_TABLE_SUPPORTS_CASCADE = False
529+
"""
530+
Hive by default does not update the schema of existing partitions when a column is changed.
531+
the CASCADE clause is used to indicate that the change should be propagated to all existing partitions.
532+
the Spark dialect, while derived from Hive, does not support the CASCADE clause.
533+
"""
534+
528535
# Whether ADD is present for each column added by ALTER TABLE
529536
ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True
530537

sqlglot/dialects/hive.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ class Hive(Dialect):
211211
SAFE_DIVISION = True
212212
ARRAY_AGG_INCLUDES_NULLS = None
213213
REGEXP_EXTRACT_DEFAULT_GROUP = 1
214+
ALTER_TABLE_SUPPORTS_CASCADE = True
214215

215216
# https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
216217
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
@@ -310,6 +311,10 @@ class Parser(parser.Parser):
310311
VALUES_FOLLOWED_BY_PAREN = False
311312
JOINS_HAVE_EQUAL_PRECEDENCE = True
312313
ADD_JOIN_ON_TRUE = True
314+
ALTER_TABLE_PARTITIONS = True
315+
316+
CHANGE_COLUMN_ALTER_SYNTAX = False
317+
# Whether the dialect supports using ALTER COLUMN syntax with CHANGE COLUMN.
313318

314319
FUNCTIONS = {
315320
**parser.Parser.FUNCTIONS,
@@ -378,6 +383,11 @@ class Parser(parser.Parser):
378383
),
379384
}
380385

386+
ALTER_PARSERS = {
387+
**parser.Parser.ALTER_PARSERS,
388+
"CHANGE": lambda self: self._parse_alter_table_change(),
389+
}
390+
381391
def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
382392
if not self._match(TokenType.L_PAREN, advance=False):
383393
self._retreat(self._index - 1)
@@ -451,6 +461,35 @@ def _parse_types(
451461

452462
return this
453463

464+
def _parse_alter_table_change(self) -> t.Optional[exp.Expression]:
465+
self._match(TokenType.COLUMN)
466+
this = self._parse_field(any_token=True)
467+
468+
if self.CHANGE_COLUMN_ALTER_SYNTAX and self._match_text_seq("TYPE"):
469+
return self.expression(
470+
exp.AlterColumn,
471+
this=this,
472+
dtype=self._parse_types(schema=True),
473+
)
474+
475+
column_new = self._parse_field(any_token=True)
476+
dtype = self._parse_types(schema=True)
477+
478+
comment = self._match(TokenType.COMMENT) and self._parse_string()
479+
480+
if not this or not column_new or not dtype:
481+
self.raise_error(
482+
"Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'"
483+
)
484+
485+
return self.expression(
486+
exp.AlterColumn,
487+
this=this,
488+
rename_to=column_new,
489+
dtype=dtype,
490+
comment=comment,
491+
)
492+
454493
def _parse_partition_and_order(
455494
self,
456495
) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
@@ -500,6 +539,7 @@ class Generator(generator.Generator):
500539
PAD_FILL_PATTERN_IS_REQUIRED = True
501540
SUPPORTS_MEDIAN = False
502541
ARRAY_SIZE_NAME = "SIZE"
542+
ALTER_SET_TYPE = ""
503543

504544
EXPRESSIONS_WITHOUT_NESTED_CTES = {
505545
exp.Insert,
@@ -757,6 +797,32 @@ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
757797
),
758798
)
759799

800+
def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
801+
this = self.sql(expression, "this")
802+
new_name = self.sql(expression, "rename_to") or this
803+
dtype = self.sql(expression, "dtype")
804+
comment = (
805+
f" COMMENT {self.sql(expression, 'comment')}"
806+
if self.sql(expression, "comment")
807+
else ""
808+
)
809+
default = self.sql(expression, "default")
810+
visible = expression.args.get("visible")
811+
allow_null = expression.args.get("allow_null")
812+
drop = expression.args.get("drop")
813+
814+
if any([default, drop, visible, allow_null, drop]):
815+
self.unsupported("Unsupported CHANGE COLUMN syntax")
816+
817+
if not dtype:
818+
self.unsupported("CHANGE COLUMN without a type is not supported")
819+
820+
return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}"
821+
822+
def renamecolumn_sql(self, expression: exp.RenameColumn) -> str:
823+
self.unsupported("Cannot rename columns without data type defined in Hive")
824+
return ""
825+
760826
def alterset_sql(self, expression: exp.AlterSet) -> str:
761827
exprs = self.expressions(expression, flat=True)
762828
exprs = f" {exprs}" if exprs else ""

sqlglot/dialects/spark2.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ def _annotate_by_similar_args(
151151

152152

153153
class Spark2(Hive):
154+
ALTER_TABLE_SUPPORTS_CASCADE = False
155+
154156
ANNOTATORS = {
155157
**Hive.ANNOTATORS,
156158
exp.Substring: lambda self, e: self._annotate_by_args(e, "this"),
@@ -172,6 +174,7 @@ class Tokenizer(Hive.Tokenizer):
172174

173175
class Parser(Hive.Parser):
174176
TRIM_PATTERN_FIRST = True
177+
CHANGE_COLUMN_ALTER_SYNTAX = True
175178

176179
FUNCTIONS = {
177180
**Hive.Parser.FUNCTIONS,
@@ -248,6 +251,7 @@ class Generator(Hive.Generator):
248251
QUERY_HINTS = True
249252
NVL2_SUPPORTED = True
250253
CAN_IMPLEMENT_ARRAY_ANY = True
254+
ALTER_SET_TYPE = "TYPE"
251255

252256
PROPERTIES_LOCATION = {
253257
**Hive.Generator.PROPERTIES_LOCATION,
@@ -364,3 +368,16 @@ def fileformatproperty_sql(self, expression: exp.FileFormatProperty) -> str:
364368
return super().fileformatproperty_sql(expression)
365369

366370
return f"USING {expression.name.upper()}"
371+
372+
def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
373+
this = self.sql(expression, "this")
374+
new_name = self.sql(expression, "rename_to") or this
375+
comment = self.sql(expression, "comment")
376+
if new_name == this:
377+
if comment:
378+
return f"ALTER COLUMN {this} COMMENT {comment}"
379+
return super().altercolumn_sql(expression)
380+
return f"RENAME COLUMN {this} TO {new_name}"
381+
382+
def renamecolumn_sql(self, expression: exp.RenameColumn) -> str:
383+
return super(Hive.Generator, self).renamecolumn_sql(expression)

sqlglot/expressions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1838,6 +1838,7 @@ class AlterColumn(Expression):
18381838
"comment": False,
18391839
"allow_null": False,
18401840
"visible": False,
1841+
"rename_to": False,
18411842
}
18421843

18431844

@@ -4957,6 +4958,7 @@ class Alter(Expression):
49574958
"cluster": False,
49584959
"not_valid": False,
49594960
"check": False,
4961+
"cascade": False,
49604962
}
49614963

49624964
@property

sqlglot/generator.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3625,10 +3625,15 @@ def alter_sql(self, expression: exp.Alter) -> str:
36253625
kind = self.sql(expression, "kind")
36263626
not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
36273627
check = " WITH CHECK" if expression.args.get("check") else ""
3628+
cascade = (
3629+
" CASCADE"
3630+
if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE
3631+
else ""
3632+
)
36283633
this = self.sql(expression, "this")
36293634
this = f" {this}" if this else ""
36303635

3631-
return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}"
3636+
return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}{cascade}"
36323637

36333638
def altersession_sql(self, expression: exp.AlterSession) -> str:
36343639
items_sql = self.expressions(expression, flat=True)

sqlglot/parser.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,9 @@ def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]
15311531
# Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
15321532
ALTER_RENAME_REQUIRES_COLUMN = True
15331533

1534+
# Whether Alter statements are allowed to contain Partition specifications
1535+
ALTER_TABLE_PARTITIONS = False
1536+
15341537
# Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree.
15351538
# In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is
15361539
# to say, JOIN operators happen before comma operators. This is not the case in some dialects, such
@@ -7732,7 +7735,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
77327735
check = None
77337736
cluster = None
77347737
else:
7735-
this = self._parse_table(schema=True)
7738+
this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS)
77367739
check = self._match_text_seq("WITH", "CHECK")
77377740
cluster = self._parse_on_property() if self._match(TokenType.ON) else None
77387741

@@ -7744,6 +7747,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
77447747
actions = ensure_list(parser(self))
77457748
not_valid = self._match_text_seq("NOT", "VALID")
77467749
options = self._parse_csv(self._parse_property)
7750+
cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")
77477751

77487752
if not self._curr and actions:
77497753
return self.expression(
@@ -7757,6 +7761,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
77577761
cluster=cluster,
77587762
not_valid=not_valid,
77597763
check=check,
7764+
cascade=cascade,
77607765
)
77617766

77627767
return self._parse_as_command(start)

tests/dialects/test_hive.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,44 @@ def test_ddl(self):
176176
},
177177
)
178178

179+
self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)")
180+
self.validate_identity(
181+
"ALTER TABLE x CHANGE a a VARCHAR(10)",
182+
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
183+
)
184+
185+
self.validate_all(
186+
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
187+
write={
188+
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
189+
"spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
190+
},
191+
)
192+
self.validate_all(
193+
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
194+
write={
195+
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
196+
"spark": "ALTER TABLE x ALTER COLUMN a COMMENT 'comment'",
197+
},
198+
)
199+
self.validate_all(
200+
"ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
201+
write={
202+
"hive": "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
203+
"spark": "ALTER TABLE x RENAME COLUMN a TO b",
204+
},
205+
)
206+
self.validate_all(
207+
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
208+
write={
209+
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
210+
"spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
211+
},
212+
)
213+
214+
self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)")
215+
self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE")
216+
179217
self.validate_identity(
180218
"""CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""",
181219
)

tests/dialects/test_spark.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from sqlglot import exp, parse_one
44
from sqlglot.dialects.dialect import Dialects
5+
from sqlglot.errors import UnsupportedError
56
from tests.dialects.test_dialect import Validator
67

78

@@ -132,6 +133,27 @@ def test_ddl(self):
132133
"spark": "ALTER TABLE StudentInfo ADD COLUMNS (LastName STRING, DOB TIMESTAMP)",
133134
},
134135
)
136+
self.validate_all(
137+
"ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
138+
write={
139+
"spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
140+
"hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
141+
},
142+
)
143+
self.validate_all(
144+
"ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
145+
write={
146+
"spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
147+
"hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
148+
},
149+
)
150+
self.validate_all(
151+
"ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
152+
write={
153+
"spark": "ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
154+
"hive": UnsupportedError,
155+
},
156+
)
135157
self.validate_all(
136158
"ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)",
137159
write={

0 commit comments

Comments
 (0)