Skip to content

Commit 2f12bd9

Browse files
authored
Fix(athena): Generate PartitionedByProperty correctly on CTAS for an Iceberg table (#4654)
1 parent 8b0b8ac commit 2f12bd9

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

sqlglot/dialects/athena.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ def _generate_as_hive(expression: exp.Expression) -> bool:
3232
return False
3333

3434

35+
def _is_iceberg_table(properties: exp.Properties) -> bool:
36+
table_type_property = next(
37+
(
38+
p
39+
for p in properties.expressions
40+
if isinstance(p, exp.Property) and p.name == "table_type"
41+
),
42+
None,
43+
)
44+
return bool(table_type_property and table_type_property.text("value").lower() == "iceberg")
45+
46+
3547
def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
3648
# If table_type='iceberg', the LocationProperty is called 'location'
3749
# Otherwise, it's called 'external_location'
@@ -40,20 +52,25 @@ def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
4052
prop_name = "external_location"
4153

4254
if isinstance(e.parent, exp.Properties):
43-
table_type_property = next(
44-
(
45-
p
46-
for p in e.parent.expressions
47-
if isinstance(p, exp.Property) and p.name == "table_type"
48-
),
49-
None,
50-
)
51-
if table_type_property and table_type_property.text("value").lower() == "iceberg":
55+
if _is_iceberg_table(e.parent):
5256
prop_name = "location"
5357

5458
return f"{prop_name}={self.sql(e, 'this')}"
5559

5660

61+
def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByProperty):
62+
# If table_type='iceberg' then the table property for partitioning is called 'partitioning'
63+
# If table_type='hive' it's called 'partitioned_by'
64+
# ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
65+
66+
prop_name = "partitioned_by"
67+
if isinstance(e.parent, exp.Properties):
68+
if _is_iceberg_table(e.parent):
69+
prop_name = "partitioning"
70+
71+
return f"{prop_name}={self.sql(e, 'this')}"
72+
73+
5774
class Athena(Trino):
5875
"""
5976
Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific modifications and then
@@ -132,7 +149,7 @@ class Generator(Trino.Generator):
132149
TRANSFORMS = {
133150
**Trino.Generator.TRANSFORMS,
134151
exp.FileFormatProperty: lambda self, e: f"format={self.sql(e, 'this')}",
135-
exp.PartitionedByProperty: lambda self, e: f"partitioned_by={self.sql(e, 'this')}",
152+
exp.PartitionedByProperty: _partitioned_by_property_sql,
136153
exp.LocationProperty: _location_property_sql,
137154
}
138155

tests/dialects/test_athena.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,8 @@ def test_dml_quoting(self):
198198

199199
def test_ctas(self):
200200
# Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location
201-
# The 'table_type' property is used to determine if it's a Hive or an Iceberg table
201+
# In addition, Hive tables used 'partitioned_by' to specify the partition fields and Iceberg tables use 'partitioning' to specify the partition fields
202+
# The 'table_type' property is used to determine if it's a Hive or an Iceberg table. If it's omitted, it defaults to Hive
202203
# ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
203204
ctas_hive = exp.Create(
204205
this=exp.to_table("foo.bar"),
@@ -207,13 +208,16 @@ def test_ctas(self):
207208
expressions=[
208209
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
209210
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
211+
exp.PartitionedByProperty(
212+
this=exp.Schema(expressions=[exp.to_column("partition_col")])
213+
),
210214
]
211215
),
212216
expression=exp.select("1"),
213217
)
214218
self.assertEqual(
215219
ctas_hive.sql(dialect=self.dialect, identify=True),
216-
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo') AS SELECT 1",
220+
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
217221
)
218222

219223
ctas_iceberg = exp.Create(
@@ -223,11 +227,14 @@ def test_ctas(self):
223227
expressions=[
224228
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
225229
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
230+
exp.PartitionedByProperty(
231+
this=exp.Schema(expressions=[exp.to_column("partition_col")])
232+
),
226233
]
227234
),
228235
expression=exp.select("1"),
229236
)
230237
self.assertEqual(
231238
ctas_iceberg.sql(dialect=self.dialect, identify=True),
232-
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo') AS SELECT 1",
239+
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col']) AS SELECT 1",
233240
)

0 commit comments

Comments
 (0)