Skip to content

Commit f5e7cec

Browse files
committed
address comments, fix tests, add tests, fix logic
1 parent 1b63b07 commit f5e7cec

File tree

10 files changed

+174
-46
lines changed

10 files changed

+174
-46
lines changed

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4286,7 +4286,7 @@
42864286
},
42874287
"MISMATCH_WITH_DISTINCT_INPUT_UNSAFE_CAST" : {
42884288
"message" : [
4289-
"The function <funcName> with DISTINCT requires a cast from <inputType> to <castType>, but this cast may not preserve equality semantics for the input type (e.g., floating-point -0.0 and 0.0 are treated as equal during GROUP BY but cast to different strings, leading to incorrect deduplication)."
4289+
"The function <funcName> with DISTINCT and WITHIN GROUP (ORDER BY) is not supported for <inputType> input. Explicitly cast the input to <castType> before passing it to the function argument and ORDER BY expression."
42904290
]
42914291
},
42924292
"WITHIN_GROUP_MISSING" : {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateExpressionResolver.scala

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -115,25 +115,25 @@ class AggregateExpressionResolver(
115115
}
116116
}
117117

118-
private def validateResolvedAggregateExpression(aggregateExpression: AggregateExpression): Unit =
118+
private def validateResolvedAggregateExpression(
119+
aggregateExpression: AggregateExpression): Unit = {
119120
aggregateExpression match {
120121
case agg @ AggregateExpression(listAgg: ListAgg, _, _, _, _)
121122
if agg.isDistinct => listAgg.validateDistinctOrderCompatibility()
122123
case _ =>
123-
if (expressionResolutionContextStack.peek().hasAggregateExpressions) {
124-
throwNestedAggregateFunction(aggregateExpression)
125-
}
126-
127-
aggregateExpression.aggregateFunction.children.foreach { child =>
128-
if (!child.deterministic) {
129-
throwAggregateFunctionWithNondeterministicExpression(
130-
aggregateExpression,
131-
child
132-
)
133-
}
134-
}
135124
}
136125

126+
if (expressionResolutionContextStack.peek().hasAggregateExpressions) {
127+
throwNestedAggregateFunction(aggregateExpression)
128+
}
129+
130+
aggregateExpression.aggregateFunction.children.foreach { child =>
131+
if (!child.deterministic) {
132+
throwAggregateFunctionWithNondeterministicExpression(aggregateExpression, child)
133+
}
134+
}
135+
}
136+
137137
/**
138138
* If the [[AggregateExpression]] has outer references in its subtree, we need to handle it in a
139139
* special way. The whole process is explained in the [[SubqueryScope]] scaladoc, but in short

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ case class ListAgg(
632632
}
633633

634634
/**
635-
* Returns true if casting `dt` to string is injective for DISTINCT deduplication.
635+
* Returns true if casting `dt` to string/binary is injective for DISTINCT deduplication.
636636
*
637637
* @see [[validateDistinctOrderCompatibility]]
638638
*/
@@ -646,7 +646,7 @@ case class ListAgg(
646646
case _: DayTimeIntervalType => true
647647
case BooleanType => true
648648
case BinaryType => true
649-
case st: StringType if st.isUTF8BinaryCollation => true
649+
case st: StringType => st.isUTF8BinaryCollation
650650
case _: DoubleType | FloatType => false
651651
// During DST fall-back, two distinct UTC epochs can format to the same local time string
652652
// because the default format omits the timezone offset. TimestampNTZType is safe (uses UTC).

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6893,8 +6893,9 @@ object SQLConf {
68936893
buildConf("spark.sql.listagg.allowDistinctCastWithOrder.enabled")
68946894
.internal()
68956895
.doc("When true, LISTAGG(DISTINCT expr) WITHIN GROUP (ORDER BY expr) is allowed on " +
6896-
"non-string expr whose cast to string is injective. When false, DISTINCT requires " +
6897-
"expr and ORDER BY to reference the same expression with no cast.")
6896+
"non-string expr when the implicit cast to string preserves equality (e.g., integer, " +
6897+
"decimal, date). When false, the function argument and ORDER BY expression must have " +
6898+
"the exact same type, which requires explicit casts.")
68986899
.version("4.2.0")
68996900
.booleanConf
69006901
.createWithDefault(true)

sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,37 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
120120
}
121121

122122

123+
-- !query
124+
SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col)
125+
-- !query analysis
126+
Aggregate [listagg(distinct cast(col#x as string), null, col#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT CAST(col AS STRING), NULL) WITHIN GROUP (ORDER BY col ASC NULLS FIRST)#x]
127+
+- SubqueryAlias t
128+
+- LocalRelation [col#x]
129+
130+
131+
-- !query
132+
SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col)
133+
-- !query analysis
134+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
135+
{
136+
"errorClass" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT_UNSAFE_CAST",
137+
"sqlState" : "42K0K",
138+
"messageParameters" : {
139+
"castType" : "\"STRING COLLATE UTF8_LCASE\"",
140+
"funcName" : "`listagg`",
141+
"inputType" : "\"STRING\""
142+
}
143+
}
144+
145+
146+
-- !query
147+
SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col)
148+
-- !query analysis
149+
Aggregate [listagg(distinct cast(col#x as string), null, col#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT CAST(col AS STRING), NULL) WITHIN GROUP (ORDER BY col ASC NULLS FIRST)#x]
150+
+- SubqueryAlias t
151+
+- LocalRelation [col#x]
152+
153+
123154
-- !query
124155
SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col)
125156
-- !query analysis
@@ -133,3 +164,26 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
133164
"inputType" : "\"BINARY\""
134165
}
135166
}
167+
168+
169+
-- !query
170+
SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col)
171+
-- !query analysis
172+
Aggregate [listagg(distinct cast(col#x as binary), null, col#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT CAST(col AS BINARY), NULL) WITHIN GROUP (ORDER BY col ASC NULLS FIRST)#x]
173+
+- SubqueryAlias t
174+
+- LocalRelation [col#x]
175+
176+
177+
-- !query
178+
SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM (SELECT col COLLATE UTF8_LCASE AS col FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col))
179+
-- !query analysis
180+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
181+
{
182+
"errorClass" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT_UNSAFE_CAST",
183+
"sqlState" : "42K0K",
184+
"messageParameters" : {
185+
"castType" : "\"BINARY\"",
186+
"funcName" : "`listagg`",
187+
"inputType" : "\"STRING COLLATE UTF8_LCASE\""
188+
}
189+
}

sql/core/src/test/resources/sql-tests/analyzer-results/listagg.sql.out

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ Aggregate [listagg(col1#x, null, col2#x DESC NULLS LAST, col1#x DESC NULLS LAST,
272272

273273

274274
-- !query
275-
WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t
275+
WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t
276276
-- !query analysis
277277
WithCTE
278278
:- CTERelationDef xxxx, false
@@ -281,13 +281,13 @@ WithCTE
281281
: +- Aggregate [listagg(col1#x, null, 0, 0) AS listagg(col1, NULL)#x]
282282
: +- SubqueryAlias __auto_generated_subquery_name
283283
: +- LocalRelation [col1#x]
284-
+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x]
284+
+- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x]
285285
+- SubqueryAlias t
286286
+- CTERelationRef xxxx, true, [col#x], false, false, 1
287287

288288

289289
-- !query
290-
WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t
290+
WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t
291291
-- !query analysis
292292
WithCTE
293293
:- CTERelationDef xxxx, false
@@ -296,13 +296,13 @@ WithCTE
296296
: +- Aggregate [listagg(col1#x, null, 0, 0) AS listagg(col1, NULL)#x]
297297
: +- SubqueryAlias __auto_generated_subquery_name
298298
: +- LocalRelation [col1#x]
299-
+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x]
299+
+- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x]
300300
+- SubqueryAlias t
301301
+- CTERelationRef xxxx, true, [col#x], false, false, 1
302302

303303

304304
-- !query
305-
WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'42'), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t
305+
WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'42')), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t
306306
-- !query analysis
307307
WithCTE
308308
:- CTERelationDef xxxx, false
@@ -311,7 +311,7 @@ WithCTE
311311
: +- Aggregate [listagg(col1#x, 0x42, 0, 0) AS listagg(col1, X'42')#x]
312312
: +- SubqueryAlias __auto_generated_subquery_name
313313
: +- LocalRelation [col1#x]
314-
+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0x42 as string)) AS regexp_count(col, X'42')#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x]
314+
+- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0x42)) AS regexp_count(hex(col), hex(X'42'))#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x]
315315
+- SubqueryAlias t
316316
+- CTERelationRef xxxx, true, [col#x], false, false, 1
317317

@@ -448,7 +448,7 @@ Aggregate [grp#x], [grp#x, listagg(distinct col#x, null, col#x ASC NULLS FIRST,
448448

449449

450450
-- !query
451-
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF'), regexp_count(col, X'CAFE') FROM t
451+
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')), regexp_count(hex(col), hex(X'CAFE')) FROM t
452452
-- !query analysis
453453
WithCTE
454454
:- CTERelationDef xxxx, false
@@ -457,13 +457,13 @@ WithCTE
457457
: +- Aggregate [listagg(distinct col1#x, 0x2C, col1#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1 ASC NULLS FIRST)#x]
458458
: +- SubqueryAlias __auto_generated_subquery_name
459459
: +- LocalRelation [col1#x]
460-
+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xDEAD as string)) AS regexp_count(col, X'DEAD')#x, regexp_count(cast(col#x as string), cast(0xBEEF as string)) AS regexp_count(col, X'BEEF')#x, regexp_count(cast(col#x as string), cast(0xCAFE as string)) AS regexp_count(col, X'CAFE')#x]
460+
+- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xDEAD)) AS regexp_count(hex(col), hex(X'DEAD'))#x, regexp_count(hex(col#x), hex(0xBEEF)) AS regexp_count(hex(col), hex(X'BEEF'))#x, regexp_count(hex(col#x), hex(0xCAFE)) AS regexp_count(hex(col), hex(X'CAFE'))#x]
461461
+- SubqueryAlias t
462462
+- CTERelationRef xxxx, true, [col#x], false, false, 1
463463

464464

465465
-- !query
466-
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(col, X'AA'), regexp_count(col, X'BB') FROM t
466+
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(hex(col), hex(X'AA')), regexp_count(hex(col), hex(X'BB')) FROM t
467467
-- !query analysis
468468
WithCTE
469469
:- CTERelationDef xxxx, false
@@ -472,7 +472,7 @@ WithCTE
472472
: +- Aggregate [listagg(distinct col1#x, 0x7C, col1#x ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1 ASC NULLS FIRST)#x]
473473
: +- SubqueryAlias __auto_generated_subquery_name
474474
: +- LocalRelation [col1#x]
475-
+- Project [len(col#x) AS len(col)#x, regexp_count(cast(col#x as string), cast(0xAA as string)) AS regexp_count(col, X'AA')#x, regexp_count(cast(col#x as string), cast(0xBB as string)) AS regexp_count(col, X'BB')#x]
475+
+- Project [len(col#x) AS len(col)#x, regexp_count(hex(col#x), hex(0xAA)) AS regexp_count(hex(col), hex(X'AA'))#x, regexp_count(hex(col#x), hex(0xBB)) AS regexp_count(hex(col), hex(X'BB'))#x]
476476
+- SubqueryAlias t
477477
+- CTERelationRef xxxx, true, [col#x], false, false, 1
478478

sql/core/src/test/resources/sql-tests/inputs/listagg-collations.sql

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,19 @@ WITH t(c1) AS (SELECT listagg(col1) WITHIN GROUP (ORDER BY col1 COLLATE unicode_
1212

1313
-- Error case with collations
1414
SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('b'), ('A'), ('B')) AS t(c1);
15-
SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col)
15+
16+
-- LISTAGG DISTINCT cast safety with collations:
17+
-- string -> string (safe): explicit cast to same collation
18+
SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col);
19+
-- string -> string (unsafe): cast to non-binary-equality collation on target
20+
SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col);
21+
22+
-- binary -> string (safe): cast to default STRING (UTF8_BINARY)
23+
SELECT listagg(DISTINCT CAST(col AS STRING)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col); -- ABC, abc, ABC
24+
-- binary -> string (unsafe): cast to non-binary-equality collation on target
25+
SELECT listagg(DISTINCT CAST(col AS STRING COLLATE UTF8_LCASE)) WITHIN GROUP (ORDER BY col) FROM VALUES (X'414243'), (X'616263'), (X'414243') AS t(col); -- ABC, abc, ABC
26+
27+
-- string -> binary (safe): UTF8_BINARY source, BinaryType target
28+
SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col);
29+
-- string -> binary (unsafe): non-binary-equality source (UTF8_LCASE), BinaryType target
30+
SELECT listagg(DISTINCT CAST(col AS BINARY)) WITHIN GROUP (ORDER BY col) FROM (SELECT col COLLATE UTF8_LCASE AS col FROM VALUES ('ABC'), ('abc'), ('ABC') AS t(col))

sql/core/src/test/resources/sql-tests/inputs/listagg.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ WITH t(col) AS (SELECT listagg(col1, '|') WITHIN GROUP (ORDER BY col2 DESC) FROM
2424
SELECT listagg(col1, '|') WITHIN GROUP (ORDER BY col2 DESC) FROM df;
2525
SELECT listagg(col1) WITHIN GROUP (ORDER BY col2 DESC, col1 ASC) FROM df;
2626
SELECT listagg(col1) WITHIN GROUP (ORDER BY col2 DESC, col1 DESC) FROM df;
27-
WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t;
28-
WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t;
29-
WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(col, X'42'), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF') FROM t;
27+
WITH t(col) AS (SELECT listagg(col1) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t;
28+
WITH t(col) AS (SELECT listagg(col1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t;
29+
WITH t(col) AS (SELECT listagg(col1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF'))) SELECT len(col), regexp_count(hex(col), hex(X'42')), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')) FROM t;
3030
WITH t(col1, col2) AS (SELECT listagg(col1), listagg(col2, ',') FROM df2) SELECT len(col1), regexp_count(col1, '1'), regexp_count(col1, '2'), regexp_count(col1, '3'), len(col2), regexp_count(col2, 'true'), regexp_count(col1, 'false') FROM t;
3131

3232
-- LISTAGG with DISTINCT with implicit cast from non-string types (safe types - should succeed)
@@ -44,8 +44,8 @@ SELECT listagg(DISTINCT col, ',') WITHIN GROUP (ORDER BY col DESC) FROM VALUES (
4444
SELECT listagg(DISTINCT col, ',') WITHIN GROUP (ORDER BY col) FROM VALUES (1), (2), (null), (2), (3) AS t(col);
4545
SELECT listagg(DISTINCT col, ',') WITHIN GROUP (ORDER BY col NULLS FIRST) FROM VALUES (1), (null), (2), (null) AS t(col);
4646
SELECT grp, listagg(DISTINCT col) WITHIN GROUP (ORDER BY col) FROM VALUES (1, 'a'), (1, 'b'), (2, 'a'), (2, 'a'), (1, 'b') AS t(grp, col) GROUP BY grp;
47-
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(col, X'DEAD'), regexp_count(col, X'BEEF'), regexp_count(col, X'CAFE') FROM t;
48-
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(col, X'AA'), regexp_count(col, X'BB') FROM t;
47+
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'2C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'DEAD'), (X'BEEF'), (X'DEAD'), (X'CAFE'))) SELECT len(col), regexp_count(hex(col), hex(X'DEAD')), regexp_count(hex(col), hex(X'BEEF')), regexp_count(hex(col), hex(X'CAFE')) FROM t;
48+
WITH t(col) AS (SELECT listagg(DISTINCT col1, X'7C') WITHIN GROUP (ORDER BY col1) FROM (VALUES (X'BB'), (X'AA'), (NULL), (X'BB'))) SELECT len(col), regexp_count(hex(col), hex(X'AA')), regexp_count(hex(col), hex(X'BB')) FROM t;
4949
SELECT grp, hex(listagg(DISTINCT col, X'2C') WITHIN GROUP (ORDER BY col)) FROM VALUES (1, X'AA'), (1, X'BB'), (1, X'AA'), (2, X'CC'), (2, X'CC') AS t(grp, col) GROUP BY grp;
5050

5151
-- Error cases

0 commit comments

Comments
 (0)