Fix(trino): Dont quote identifiers in string literals for the partitioned_by property (#4998)

erindru · georgesittas · web-flow · commit adaef42234d8 · 2025-04-22T11:12:19.000+03:00
* Fix(trino): Dont quote identifiers in string literals for the partitioned_by property

* Simplify transformation

---------

Co-authored-by: George Sittas &lt;giwrgos.sittas@gmail.com&gt;
diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py
@@ -57,11 +57,13 @@ def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str:
 
 def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str:
     if isinstance(expression.parent, exp.PartitionedByProperty):
+        # Any columns in the ARRAY[] string literals should not be quoted
+        expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False)
+
         partition_exprs = [
             self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this")
             for c in expression.expressions
         ]
-
         return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs]))
 
     if expression.parent:
diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py
@@ -276,15 +276,17 @@ def test_ctas(self):
                     exp.FileFormatProperty(this=exp.Literal.string("parquet")),
                     exp.LocationProperty(this=exp.Literal.string("s3://foo")),
                     exp.PartitionedByProperty(
-                        this=exp.Schema(expressions=[exp.to_column("partition_col")])
+                        this=exp.Schema(expressions=[exp.to_column("partition_col", quoted=True)])
                     ),
                 ]
             ),
             expression=exp.select("1"),
         )
+
+        # Even if identify=True, the column names should not be quoted within the string literals in the partitioned_by ARRAY[]
         self.assertEqual(
             ctas_hive.sql(dialect=self.dialect, identify=True),
-            "CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
+            "CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
         )
         self.assertEqual(
             ctas_hive.sql(dialect=self.dialect, identify=False),
@@ -303,7 +305,8 @@ def test_ctas(self):
                             expressions=[
                                 exp.to_column("partition_col"),
                                 exp.PartitionedByBucket(
-                                    this=exp.to_column("a"), expression=exp.Literal.number(4)
+                                    this=exp.to_column("a", quoted=True),
+                                    expression=exp.Literal.number(4),
                                 ),
                             ]
                         )
@@ -312,9 +315,12 @@ def test_ctas(self):
             ),
             expression=exp.select("1"),
         )
+        # Even if identify=True, the column names should not be quoted within the string literals in the partitioning ARRAY[]
+        # Technically Trino's Iceberg connector does support quoted column names in the string literals but its undocumented
+        # so we dont do it to keep consistency with the Hive connector
         self.assertEqual(
             ctas_iceberg.sql(dialect=self.dialect, identify=True),
-            "CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
+            "CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
         )
         self.assertEqual(
             ctas_iceberg.sql(dialect=self.dialect, identify=False),
diff --git a/tests/dialects/test_trino.py b/tests/dialects/test_trino.py
@@ -97,11 +97,19 @@ def test_ddl(self):
         self.validate_identity(
             "CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])"
         )
+        self.validate_identity(
+            'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONED_BY=ARRAY[\'a\', \'b\'])',
+            identify=True,
+        )
 
         # Iceberg connector syntax (partitioning, can contain Iceberg transform expressions)
         self.validate_identity(
             "CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])",
         )
+        self.validate_identity(
+            'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONING=ARRAY[\'a\', \'bucket(4, b)\', \'month(c)\'])',
+            identify=True,
+        )
 
     def test_analyze(self):
         self.validate_identity("ANALYZE tbl")