address comments, fix tests, add tests, fix logic

helioshe4 · helioshe4 · commit f5e7cec1333c · 2026-02-19T07:33:51.000Z
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -4286,7 +4286,7 @@
       },
       "MISMATCH_WITH_DISTINCT_INPUT_UNSAFE_CAST" : {
         "message" : [
-          "The function <funcName> with DISTINCT requires a cast from <inputType> to <castType>, but this cast may not preserve equality semantics for the input type (e.g., floating-point -0.0 and 0.0 are treated as equal during GROUP BY but cast to different strings, leading to incorrect deduplication)."
+          "The function <funcName> with DISTINCT and WITHIN GROUP (ORDER BY) is not supported for <inputType> input. Explicitly cast the input to <castType> before passing it to the function argument and ORDER BY expression."
         ]
       },
       "WITHIN_GROUP_MISSING" : {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateExpressionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateExpressionResolver.scala
@@ -115,25 +115,25 @@ class AggregateExpressionResolver(
     }
   }
 
-  private def validateResolvedAggregateExpression(aggregateExpression: AggregateExpression): Unit =
+  private def validateResolvedAggregateExpression(
+      aggregateExpression: AggregateExpression): Unit = {
     aggregateExpression match {
       case agg @ AggregateExpression(listAgg: ListAgg, _, _, _, _)
           if agg.isDistinct => listAgg.validateDistinctOrderCompatibility()
       case _ =>
-        if (expressionResolutionContextStack.peek().hasAggregateExpressions) {
-          throwNestedAggregateFunction(aggregateExpression)
-        }
-
-        aggregateExpression.aggregateFunction.children.foreach { child =>
-          if (!child.deterministic) {
-            throwAggregateFunctionWithNondeterministicExpression(
-              aggregateExpression,
-              child
-            )
-          }
-        }
     }
 
+    if (expressionResolutionContextStack.peek().hasAggregateExpressions) {
+      throwNestedAggregateFunction(aggregateExpression)
+    }
+
+    aggregateExpression.aggregateFunction.children.foreach { child =>
+      if (!child.deterministic) {
+        throwAggregateFunctionWithNondeterministicExpression(aggregateExpression, child)
+      }
+    }
+  }
+
   /**
    * If the [[AggregateExpression]] has outer references in its subtree, we need to handle it in a
    * special way. The whole process is explained in the [[SubqueryScope]] scaladoc, but in short
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -632,7 +632,7 @@ case class ListAgg(
   }
 
   /**
-   * Returns true if casting `dt` to string is injective for DISTINCT deduplication.
+   * Returns true if casting `dt` to string/binary is injective for DISTINCT deduplication.
    *
    * @see [[validateDistinctOrderCompatibility]]
    */
@@ -646,7 +646,7 @@ case class ListAgg(
     case _: DayTimeIntervalType => true
     case BooleanType => true
     case BinaryType => true
-    case st: StringType if st.isUTF8BinaryCollation => true
+    case st: StringType => st.isUTF8BinaryCollation
     case _: DoubleType | FloatType => false
     // During DST fall-back, two distinct UTC epochs can format to the same local time string
     // because the default format omits the timezone offset. TimestampNTZType is safe (uses UTC).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -6893,8 +6893,9 @@ object SQLConf {
     buildConf("spark.sql.listagg.allowDistinctCastWithOrder.enabled")
       .internal()
       .doc("When true, LISTAGG(DISTINCT expr) WITHIN GROUP (ORDER BY expr) is allowed on " +
-        "non-string expr whose cast to string is injective. When false, DISTINCT requires " +
-        "expr and ORDER BY to reference the same expression with no cast.")
+        "non-string expr when the implicit cast to string preserves equality (e.g., integer, " +
+        "decimal, date). When false, the function argument and ORDER BY expression must have " +
+        "the exact same type, which requires explicit casts.")
       .version("4.2.0")
       .booleanConf
       .createWithDefault(true)
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out
@@ -120,6 +120,37 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col)
+-- !query analysis
+Aggregate [listagg(distinct cast(col#x as string), null, col#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT CAST(col AS STRING), NULL) WITHIN GROUP (ORDER BY col ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- LocalRelation [col#x]
+
+
+-- !query
+SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT_UNSAFE_CAST",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "castType" : "\"STRING COLLATE UTF8_LCASE\"",
+    "funcName" : "`listagg`",
+    "inputType" : "\"STRING\""
+  }
+}
+
+
+-- !query
+SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col)
+-- !query analysis
+Aggregate [listagg(distinct cast(col#x as string), null, col#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT CAST(col AS STRING), NULL) WITHIN GROUP (ORDER BY col ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- LocalRelation [col#x]
+
+
 -- !query
 SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col)
 -- !query analysis
@@ -133,3 +164,26 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputType" : "\"BINARY\""
   }
 }
+
+
+-- !query
+SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col)
+-- !query analysis
+Aggregate [listagg(distinct cast(col#x as binary), null, col#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT CAST(col AS BINARY), NULL) WITHIN GROUP (ORDER BY col ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- LocalRelation [col#x]
+
+
+-- !query
+SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM (SELECT col COLLATE UTF8_LCASE AS col FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col))
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT_UNSAFE_CAST",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "castType" : "\"BINARY\"",
+    "funcName" : "`listagg`",
+    "inputType" : "\"STRING COLLATE UTF8_LCASE\""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg.sql.out
@@ -272,7 +272,7 @@ Aggregate [listagg(col1#x, null, col2#x DESC NULLS LAST, col1#x DESC NULLS LAST,
 
 
 -- !query
-WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t
+WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t
 -- !query analysis
 WithCTE
 :- CTERelationDef xxxx, false
@@ -281,13 +281,13 @@ WithCTE
 :        +- Aggregate [listagg(col1#x, null, 0, 0) AS listagg(col1, NULL)#x]
 :           +- SubqueryAlias __auto_generated_subquery_name
 :              +- LocalRelation [col1#x]
-+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x]
++- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x]
    +- SubqueryAlias t
       +- CTERelationRef xxxx, true, [col#x], false, false, 1
 
 
 -- !query
-WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t
+WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t
 -- !query analysis
 WithCTE
 :- CTERelationDef xxxx, false
@@ -296,13 +296,13 @@ WithCTE
 :        +- Aggregate [listagg(col1#x, null, 0, 0) AS listagg(col1, NULL)#x]
 :           +- SubqueryAlias __auto_generated_subquery_name
 :              +- LocalRelation [col1#x]
-+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x]
++- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x]
    +- SubqueryAlias t
       +- CTERelationRef xxxx, true, [col#x], false, false, 1
 
 
 -- !query
-WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'42'), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t
+WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'42')), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t
 -- !query analysis
 WithCTE
 :- CTERelationDef xxxx, false
@@ -311,7 +311,7 @@ WithCTE
 :        +- Aggregate [listagg(col1#x, 0x42, 0, 0) AS listagg(col1, X'42')#x]
 :           +- SubqueryAlias __auto_generated_subquery_name
 :              +- LocalRelation [col1#x]
-+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0x42 as string)) AS regexp_count(col, X'42')#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x]
++- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0x42)) AS regexp_count(hex(col), hex(X'42'))#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x]
    +- SubqueryAlias t
       +- CTERelationRef xxxx, true, [col#x], false, false, 1
 
@@ -448,7 +448,7 @@ Aggregate [grp#x], [grp#x, listagg(distinct col#x, null, col#x ASC NULLS FIRST,
 
 
 -- !query
-WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF'), regexp_count(col, X'CAFE') FROM t
+WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')), regexp_count(hex(col), hex(X'CAFE')) FROM t
 -- !query analysis
 WithCTE
 :- CTERelationDef xxxx, false
@@ -457,13 +457,13 @@ WithCTE
 :        +- Aggregate [listagg(distinct col1#x, 0x2C, col1#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1 ASC NULLS FIRST)#x]
 :           +- SubqueryAlias __auto_generated_subquery_name
 :              +- LocalRelation [col1#x]
-+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x, regexp_count(cast(col#x as string), cast(0xCAFE as string)) AS regexp_count(col, X'CAFE')#x]
++- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x, regexp_count(hex(col#x), hex(0xCAFE)) AS regexp_count(hex(col), hex(X'CAFE'))#x]
    +- SubqueryAlias t
       +- CTERelationRef xxxx, true, [col#x], false, false, 1
 
 
 -- !query
-WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(col, X'AA'), regexp_count(col, X'BB') FROM t
+WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(hex(col), hex(X'AA')), regexp_count(hex(col), hex(X'BB')) FROM t
 -- !query analysis
 WithCTE
 :- CTERelationDef xxxx, false
@@ -472,7 +472,7 @@ WithCTE
 :        +- Aggregate [listagg(distinct col1#x, 0x7C, col1#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1 ASC NULLS FIRST)#x]
 :           +- SubqueryAlias __auto_generated_subquery_name
 :              +- LocalRelation [col1#x]
-+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xAA as string)) AS regexp_count(col, X'AA')#x, regexp_count(cast(col#x as string), cast(0xBB as string)) AS regexp_count(col, X'BB')#x]
++- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xAA)) AS regexp_count(hex(col), hex(X'AA'))#x, regexp_count(hex(col#x), hex(0xBB)) AS regexp_count(hex(col), hex(X'BB'))#x]
    +- SubqueryAlias t
       +- CTERelationRef xxxx, true, [col#x], false, false, 1
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/listagg-collations.sql b/sql/core/src/test/resources/sql-tests/inputs/listagg-collations.sql
@@ -12,4 +12,19 @@ WITH t(c1) AS (SELECT listagg(col1) WITHIN GROUP (ORDER BY col1 COLLATE unicode_
 
 -- Error case with collations
 SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('b'), ('A'), ('B')) AS t(c1);
-SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col)
+
+-- LISTAGG DISTINCT cast safety with collations:
+-- string -> string (safe): explicit cast to same collation
+SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col);
+-- string -> string (unsafe): cast to non-binary-equality collation on target
+SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col);
+
+-- binary -> string (safe): cast to default STRING (UTF8_BINARY)
+SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col);  -- ABC, abc, ABC
+-- binary -> string (unsafe): cast to non-binary-equality collation on target
+SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col);  -- ABC, abc, ABC
+
+-- string -> binary (safe): UTF8_BINARY source, BinaryType target
+SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col);
+-- string -> binary (unsafe): non-binary-equality source (UTF8_LCASE), BinaryType target
+SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM (SELECT col COLLATE UTF8_LCASE AS col FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/listagg.sql b/sql/core/src/test/resources/sql-tests/inputs/listagg.sql
@@ -24,9 +24,9 @@ WITH t(col) AS (SELECT listagg(col1, '|') WITHIN GROUP (ORDER BY col2 DESC) FROM
 SELECT listagg(col1, '|') WITHIN GROUP (ORDER BY col2 DESC) FROM df;
 SELECT listagg(col1) WITHIN GROUP (ORDER BY col2 DESC, col1 ASC) FROM df;
 SELECT listagg(col1) WITHIN GROUP (ORDER BY col2 DESC, col1 DESC) FROM df;
-WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t;
-WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t;
-WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'42'), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t;
+WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t;
+WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t;
+WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'42')), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t;
 WITH t(col1, col2) AS (SELECT listagg(col1), listagg(col2, ',') FROM df2) SELECT len(col1), regexp_count(col1, '1'), regexp_count(col1, '2'), regexp_count(col1, '3'), len(col2), regexp_count(col2, 'true'), regexp_count(col1, 'false') FROM t;
 
 -- LISTAGG with DISTINCT with implicit cast from non-string types (safe types - should succeed)
@@ -44,8 +44,8 @@ SELECT listagg(DISTINCT col, ',') WITHIN GROUP (ORDER BY col DESC) FROM VALUES (
 SELECT listagg(DISTINCT col, ',') WITHIN GROUP (ORDER BY col) FROM VALUES (1), (2), (null), (2), (3) AS t(col);
 SELECT listagg(DISTINCT col, ',') WITHIN GROUP (ORDER BY col NULLS FIRST) FROM VALUES (1), (null), (2), (null) AS t(col);
 SELECT grp, listagg(DISTINCT col) WITHIN GROUP (ORDER BY col) FROM VALUES (1, 'a'), (1, 'b'), (2, 'a'), (2, 'a'), (1, 'b') AS t(grp, col) GROUP BY grp;
-WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF'), regexp_count(col, X'CAFE') FROM t;
-WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(col, X'AA'), regexp_count(col, X'BB') FROM t;
+WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')), regexp_count(hex(col), hex(X'CAFE')) FROM t;
+WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(hex(col), hex(X'AA')), regexp_count(hex(col), hex(X'BB')) FROM t;
 SELECT grp, hex(listagg(DISTINCT col, X'2C') WITHIN GROUP (ORDER BY col)) FROM VALUES (1, X'AA'), (1, X'BB'), (1, X'AA'), (2, X'CC'), (2, X'CC') AS t(grp, col) GROUP BY grp;
 
 -- Error cases
diff --git a/sql/core/src/test/resources/sql-tests/results/listagg-collations.sql.out b/sql/core/src/test/resources/sql-tests/results/listagg-collations.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/listagg.sql.out b/sql/core/src/test/resources/sql-tests/results/listagg.sql.out

Original file line number	Diff line number	Diff line change
`@@ -4286,7 +4286,7 @@`
`4286`	`4286`	`},`
`4287`	`4287`	`"MISMATCH_WITH_DISTINCT_INPUT_UNSAFE_CAST" : {`
`4288`	`4288`	`"message" : [`
`4289`		`- "The function <funcName> with DISTINCT requires a cast from <inputType> to <castType>, but this cast may not preserve equality semantics for the input type (e.g., floating-point -0.0 and 0.0 are treated as equal during GROUP BY but cast to different strings, leading to incorrect deduplication)."`
	`4289`	`+ "The function <funcName> with DISTINCT and WITHIN GROUP (ORDER BY) is not supported for <inputType> input. Explicitly cast the input to <castType> before passing it to the function argument and ORDER BY expression."`
`4290`	`4290`	`]`
`4291`	`4291`	`},`
`4292`	`4292`	`"WITHIN_GROUP_MISSING" : {`