Skip to content

Commit adaef42

Browse files
Fix(trino): Dont quote identifiers in string literals for the partitioned_by property (#4998)
* Fix(trino): Dont quote identifiers in string literals for the partitioned_by property * Simplify transformation --------- Co-authored-by: George Sittas <[email protected]>
1 parent 6572517 commit adaef42

File tree

3 files changed

+21
-5
lines changed

3 files changed

+21
-5
lines changed

sqlglot/dialects/presto.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,13 @@ def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str:
5757

5858
def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str:
5959
if isinstance(expression.parent, exp.PartitionedByProperty):
60+
# Any columns in the ARRAY[] string literals should not be quoted
61+
expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False)
62+
6063
partition_exprs = [
6164
self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this")
6265
for c in expression.expressions
6366
]
64-
6567
return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs]))
6668

6769
if expression.parent:

tests/dialects/test_athena.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -276,15 +276,17 @@ def test_ctas(self):
276276
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
277277
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
278278
exp.PartitionedByProperty(
279-
this=exp.Schema(expressions=[exp.to_column("partition_col")])
279+
this=exp.Schema(expressions=[exp.to_column("partition_col", quoted=True)])
280280
),
281281
]
282282
),
283283
expression=exp.select("1"),
284284
)
285+
286+
# Even if identify=True, the column names should not be quoted within the string literals in the partitioned_by ARRAY[]
285287
self.assertEqual(
286288
ctas_hive.sql(dialect=self.dialect, identify=True),
287-
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
289+
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
288290
)
289291
self.assertEqual(
290292
ctas_hive.sql(dialect=self.dialect, identify=False),
@@ -303,7 +305,8 @@ def test_ctas(self):
303305
expressions=[
304306
exp.to_column("partition_col"),
305307
exp.PartitionedByBucket(
306-
this=exp.to_column("a"), expression=exp.Literal.number(4)
308+
this=exp.to_column("a", quoted=True),
309+
expression=exp.Literal.number(4),
307310
),
308311
]
309312
)
@@ -312,9 +315,12 @@ def test_ctas(self):
312315
),
313316
expression=exp.select("1"),
314317
)
318+
# Even if identify=True, the column names should not be quoted within the string literals in the partitioning ARRAY[]
319+
# Technically Trino's Iceberg connector does support quoted column names in the string literals but its undocumented
320+
# so we dont do it to keep consistency with the Hive connector
315321
self.assertEqual(
316322
ctas_iceberg.sql(dialect=self.dialect, identify=True),
317-
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
323+
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
318324
)
319325
self.assertEqual(
320326
ctas_iceberg.sql(dialect=self.dialect, identify=False),

tests/dialects/test_trino.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,19 @@ def test_ddl(self):
9797
self.validate_identity(
9898
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])"
9999
)
100+
self.validate_identity(
101+
'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONED_BY=ARRAY[\'a\', \'b\'])',
102+
identify=True,
103+
)
100104

101105
# Iceberg connector syntax (partitioning, can contain Iceberg transform expressions)
102106
self.validate_identity(
103107
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])",
104108
)
109+
self.validate_identity(
110+
'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONING=ARRAY[\'a\', \'bucket(4, b)\', \'month(c)\'])',
111+
identify=True,
112+
)
105113

106114
def test_analyze(self):
107115
self.validate_identity("ANALYZE tbl")

0 commit comments

Comments
 (0)