Skip to content

Commit 13e5504

Browse files
jovanpavl-dbMaxGekk
authored andcommitted
[SPARK-49874][SQL] Remove trim and ltrim collation specifiers
### What changes were proposed in this pull request? Removing collation trim and rtrim support. ### Why are the changes needed? Trim and Ltrim collations won't be initially supported. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Changed existing tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#48349 from jovanpavl-db/remove-ltrim-trim-collations. Authored-by: Jovan Pavlovic <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent f694ea8 commit 13e5504

File tree

7 files changed

+61
-77
lines changed

7 files changed

+61
-77
lines changed

common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ public Collation(
200200
* bit 29: 0 for UTF8_BINARY, 1 for ICU collations.
201201
* bit 28-24: Reserved.
202202
* bit 23-22: Reserved for version.
203-
* bit 21-18: Reserved for space trimming.
204-
* 0000 = none, 0001 = left trim, 0010 = right trim, 0011 = trim.
203+
* bit 21-19 Zeros, reserved for future trimmings.
204+
* bit 18 0 = none, 1 = right trim.
205205
* bit 17-0: Depend on collation family.
206206
* ---
207207
* INDETERMINATE collation ID binary layout:
@@ -216,8 +216,8 @@ public Collation(
216216
* UTF8_BINARY collation ID binary layout:
217217
* bit 31-24: Zeroes.
218218
* bit 23-22: Zeroes, reserved for version.
219-
* bit 21-18: Reserved for space trimming.
220-
* 0000 = none, 0001 = left trim, 0010 = right trim, 0011 = trim.
219+
* bit 21-19 Zeros, reserved for future trimmings.
220+
* bit 18 0 = none, 1 = right trim.
221221
* bit 17-3: Zeroes.
222222
* bit 2: 0, reserved for accent sensitivity.
223223
* bit 1: 0, reserved for uppercase and case-insensitive.
@@ -229,28 +229,28 @@ public Collation(
229229
* bit 28-24: Zeroes.
230230
* bit 23-22: Zeroes, reserved for version.
231231
* bit 21-18: Reserved for space trimming.
232-
* 0000 = none, 0001 = left trim, 0010 = right trim, 0011 = trim.
232+
* 0000 = none, 0001 = right trim. Bits 21-19 remain reserved and fixed to 0.
233233
* bit 17: 0 = case-sensitive, 1 = case-insensitive.
234234
* bit 16: 0 = accent-sensitive, 1 = accent-insensitive.
235235
* bit 15-14: Zeroes, reserved for punctuation sensitivity.
236236
* bit 13-12: Zeroes, reserved for first letter preference.
237237
* bit 11-0: Locale ID as specified in `ICULocaleToId` mapping.
238238
* ---
239239
* Some illustrative examples of collation name to ID mapping:
240-
* - UTF8_BINARY -> 0
241-
* - UTF8_LCASE -> 1
242-
* - UNICODE -> 0x20000000
243-
* - UNICODE_AI -> 0x20010000
244-
* - UNICODE_CI -> 0x20020000
245-
* - UNICODE_LTRIM -> 0x20040000
246-
* - UNICODE_RTRIM -> 0x20080000
247-
* - UNICODE_TRIM -> 0x200C0000
248-
* - UNICODE_CI_AI -> 0x20030000
249-
* - UNICODE_CI_TRIM -> 0x200E0000
250-
* - UNICODE_AI_TRIM -> 0x200D0000
251-
* - UNICODE_CI_AI_TRIM-> 0x200F0000
252-
* - af -> 0x20000001
253-
* - af_CI_AI -> 0x20030001
240+
* - UTF8_BINARY -> 0
241+
* - UTF8_BINARY_RTRIM -> 0x00040000
242+
* - UTF8_LCASE -> 1
243+
* - UTF8_LCASE_RTRIM -> 0x00040001
244+
* - UNICODE -> 0x20000000
245+
* - UNICODE_AI -> 0x20010000
246+
* - UNICODE_CI -> 0x20020000
247+
* - UNICODE_RTRIM -> 0x20040000
248+
* - UNICODE_CI_AI -> 0x20030000
249+
* - UNICODE_CI_RTRIM -> 0x20060000
250+
* - UNICODE_AI_RTRIM -> 0x20050000
251+
* - UNICODE_CI_AI_RTRIM-> 0x20070000
252+
* - af -> 0x20000001
253+
* - af_CI_AI -> 0x20030001
254254
*/
255255
private abstract static class CollationSpec {
256256

@@ -270,12 +270,11 @@ protected enum ImplementationProvider {
270270
}
271271

272272
/**
273-
* Bits 19-18 having value 00 for no space trimming, 01 for left space trimming
274-
* 10 for right space trimming and 11 for both sides space trimming. Bits 21, 20
275-
* remained reserved (and fixed to 0) for future use.
273+
* Bit 18 in collation ID having value 0 for none and 1 for right trimming.
274+
* Bits 21, 20, 19 remained reserved (and fixed to 0) for future use.
276275
*/
277276
protected enum SpaceTrimming {
278-
NONE, LTRIM, RTRIM, TRIM
277+
NONE, RTRIM
279278
}
280279

281280
/**
@@ -307,7 +306,7 @@ protected enum SpaceTrimming {
307306
/**
308307
* Bitmask corresponding to width in bits in binary collation ID layout.
309308
*/
310-
protected static final int SPACE_TRIMMING_MASK = 0b11;
309+
protected static final int SPACE_TRIMMING_MASK = 0b1;
311310

312311
private static final int INDETERMINATE_COLLATION_ID = -1;
313312

@@ -349,12 +348,10 @@ protected static UTF8String applyTrimmingPolicy(UTF8String s, int collationId) {
349348
* Utility function to trim spaces when collation uses space trimming.
350349
*/
351350
protected static UTF8String applyTrimmingPolicy(UTF8String s, SpaceTrimming spaceTrimming) {
352-
return switch (spaceTrimming) {
353-
case LTRIM -> s.trimLeft();
354-
case RTRIM -> s.trimRight();
355-
case TRIM -> s.trim();
356-
default -> s; // NOTRIM
357-
};
351+
if(spaceTrimming == SpaceTrimming.RTRIM){
352+
return s.trimRight();
353+
}
354+
return s; // No trimming.
358355
}
359356

360357
/**
@@ -505,12 +502,8 @@ private static int collationNameToId(String originalName, String collationName)
505502

506503
SpaceTrimming spaceTrimming = SpaceTrimming.NONE;
507504
String remainingSpec = remainingSpecifiers.substring(1);
508-
if (remainingSpec.equals("LTRIM")) {
509-
spaceTrimming = SpaceTrimming.LTRIM;
510-
} else if (remainingSpec.equals("RTRIM")) {
505+
if (remainingSpec.equals("RTRIM")) {
511506
spaceTrimming = SpaceTrimming.RTRIM;
512-
} else if(remainingSpec.equals("TRIM")) {
513-
spaceTrimming = SpaceTrimming.TRIM;
514507
} else {
515508
throw collationInvalidNameException(originalName);
516509
}
@@ -851,9 +844,7 @@ private static int collationNameToId(
851844
accentSensitivity = AccentSensitivity.valueOf(specifier);
852845
isAccentSpecifierSet = true;
853846
break;
854-
case "LTRIM":
855847
case "RTRIM":
856-
case "TRIM":
857848
if (isSpaceTrimmingSpecifierSet) {
858849
throw collationInvalidNameException(originalName);
859850
}

common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
369369
1 << 15, // UTF8_BINARY mandatory zero bit 15 breach.
370370
1 << 16, // UTF8_BINARY mandatory zero bit 16 breach.
371371
1 << 17, // UTF8_BINARY mandatory zero bit 17 breach.
372+
1 << 19, // UTF8_BINARY mandatory zero bit 19 breach.
372373
1 << 20, // UTF8_BINARY mandatory zero bit 20 breach.
373374
1 << 21, // UTF8_BINARY mandatory zero bit 21 breach.
374375
1 << 23, // UTF8_BINARY mandatory zero bit 23 breach.
@@ -381,6 +382,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
381382
(1 << 29) | (1 << 13), // ICU mandatory zero bit 13 breach.
382383
(1 << 29) | (1 << 14), // ICU mandatory zero bit 14 breach.
383384
(1 << 29) | (1 << 15), // ICU mandatory zero bit 15 breach.
385+
(1 << 29) | (1 << 19), // ICU mandatory zero bit 19 breach.
384386
(1 << 29) | (1 << 20), // ICU mandatory zero bit 20 breach.
385387
(1 << 29) | (1 << 21), // ICU mandatory zero bit 21 breach.
386388
(1 << 29) | (1 << 22), // ICU mandatory zero bit 22 breach.

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,8 +764,7 @@ object SQLConf {
764764
.internal()
765765
.doc(
766766
"Trim collation feature is under development and its use should be done under this" +
767-
"feature flag. Trim collation trims leading, trailing or both spaces depending of" +
768-
"specifier (LTRIM, RTRIM, TRIM)."
767+
"feature flag. Trim collation trims trailing whitespaces from strings."
769768
)
770769
.version("4.0.0")
771770
.booleanConf

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -169,28 +169,20 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
169169
("", "UTF8_BINARY", UTF8String.fromString("").getBytes),
170170
("aa", "UTF8_BINARY", UTF8String.fromString("aa").getBytes),
171171
("AA", "UTF8_BINARY", UTF8String.fromString("AA").getBytes),
172-
(" AA ", "UTF8_BINARY_TRIM", UTF8String.fromString("AA").getBytes),
173-
(" AA ", "UTF8_BINARY_LTRIM", UTF8String.fromString("AA ").getBytes),
174172
(" AA ", "UTF8_BINARY_RTRIM", UTF8String.fromString(" AA").getBytes),
175173
("aA", "UTF8_BINARY", UTF8String.fromString("aA").getBytes),
176174
("", "UTF8_LCASE", UTF8String.fromString("").getBytes),
177175
("aa", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
178176
("AA", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
179-
(" AA ", "UTF8_LCASE_TRIM", UTF8String.fromString("aa").getBytes),
180-
(" AA ", "UTF8_LCASE_LTRIM", UTF8String.fromString("aa ").getBytes),
181177
(" AA ", "UTF8_LCASE_RTRIM", UTF8String.fromString(" aa").getBytes),
182178
("aA", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
183179
("", "UNICODE", Array[Byte](1, 1, 0)),
184180
("aa", "UNICODE", Array[Byte](42, 42, 1, 6, 1, 6, 0)),
185181
("AA", "UNICODE", Array[Byte](42, 42, 1, 6, 1, -36, -36, 0)),
186182
("aA", "UNICODE", Array[Byte](42, 42, 1, 6, 1, -59, -36, 0)),
187-
(" aa ", "UNICODE_TRIM", Array[Byte](42, 42, 1, 6, 1, 6, 0)),
188-
(" aa", "UNICODE_LTRIM", Array[Byte](42, 42, 1, 6, 1, 6, 0)),
189183
("aa ", "UNICODE_RTRIM", Array[Byte](42, 42, 1, 6, 1, 6, 0)),
190184
("", "UNICODE_CI", Array[Byte](1, 0)),
191185
("aa", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0)),
192-
(" aa ", "UNICODE_CI_TRIM", Array[Byte](42, 42, 1, 6, 0)),
193-
(" aa", "UNICODE_CI_LTRIM", Array[Byte](42, 42, 1, 6, 0)),
194186
("aa ", "UNICODE_CI_RTRIM", Array[Byte](42, 42, 1, 6, 0)),
195187
("AA", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0)),
196188
("aA", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0))

sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
4949
"utf8_lcase",
5050
"unicode",
5151
"unicode_ci",
52-
"unicode_ltrim_ci",
53-
"utf8_lcase_trim",
52+
"unicode_rtrim_ci",
53+
"utf8_lcase_rtrim",
5454
"utf8_binary_rtrim"
5555
).foreach { collationName =>
5656
checkAnswer(sql(s"select 'aaa' collate $collationName"), Row("aaa"))
@@ -68,8 +68,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
6868
"utf8_lcase",
6969
"uNicOde",
7070
"UNICODE_ci",
71-
"uNiCoDE_ltRIm_cI",
72-
"UtF8_lCaSE_tRIM",
71+
"uNiCoDE_rtRIm_cI",
72+
"UtF8_lCaSE_rtRIM",
7373
"utf8_biNAry_RtRiM"
7474
).foreach { collationName =>
7575
checkAnswer(sql(s"select 'aaa' collate $collationName"), Row("aaa"))
@@ -87,8 +87,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
8787
"utf8_lcase",
8888
"unicode",
8989
"unicode_ci",
90-
"unicode_ci_ltrim",
91-
"utf8_lcase_trim",
90+
"unicode_ci_rtrim",
91+
"utf8_lcase_rtrim",
9292
"utf8_binary_rtrim"
9393
).foreach { collationName =>
9494
checkAnswer(
@@ -113,8 +113,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
113113
)
114114
assert(sql(s"select collate('aaa', 'UNICODE')").schema(0).dataType == StringType("UNICODE"))
115115
assert(
116-
sql(s"select collate('aaa', 'UNICODE_TRIM')").schema(0).dataType ==
117-
StringType("UNICODE_TRIM")
116+
sql(s"select collate('aaa', 'UNICODE_RTRIM')").schema(0).dataType ==
117+
StringType("UNICODE_RTRIM")
118118
)
119119
}
120120
}
@@ -1102,7 +1102,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
11021102
}
11031103

11041104
for (collation <- Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI",
1105-
"UNICODE_CI_TRIM", "")) {
1105+
"UNICODE_CI_RTRIM", "")) {
11061106
for (codeGen <- Seq("NO_CODEGEN", "CODEGEN_ONLY")) {
11071107
val collationSetup = if (collation.isEmpty) "" else " COLLATE " + collation
11081108
val supportsBinaryEquality = collation.isEmpty || collation == "UNICODE" ||
@@ -1301,7 +1301,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
13011301
HashJoinTestCase("UTF8_LCASE", "aa", "AA", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2))),
13021302
HashJoinTestCase("UNICODE", "aa", "AA", Seq(Row("aa", 1, "aa", 2))),
13031303
HashJoinTestCase("UNICODE_CI", "aa", "AA", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2))),
1304-
HashJoinTestCase("UNICODE_CI_TRIM", "aa", " AA ", Seq(Row("aa", 1, " AA ", 2),
1304+
HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ", Seq(Row("aa", 1, "AA ", 2),
13051305
Row("aa", 1, "aa", 2)))
13061306
)
13071307

@@ -1358,8 +1358,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
13581358
Seq(Row(Seq("aa"), 1, Seq("aa"), 2))),
13591359
HashJoinTestCase("UNICODE_CI", "aa", "AA",
13601360
Seq(Row(Seq("aa"), 1, Seq("AA"), 2), Row(Seq("aa"), 1, Seq("aa"), 2))),
1361-
HashJoinTestCase("UNICODE_CI_TRIM", "aa", " AA ",
1362-
Seq(Row(Seq("aa"), 1, Seq(" AA "), 2), Row(Seq("aa"), 1, Seq("aa"), 2)))
1361+
HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
1362+
Seq(Row(Seq("aa"), 1, Seq("AA "), 2), Row(Seq("aa"), 1, Seq("aa"), 2)))
13631363
)
13641364

13651365
testCases.foreach(t => {
@@ -1416,8 +1416,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
14161416
Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
14171417
HashJoinTestCase("UNICODE_CI", "aa", "AA",
14181418
Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
1419-
HashJoinTestCase("UNICODE_CI_TRIM", "aa", " AA ",
1420-
Seq(Row(Seq(Seq("aa")), 1, Seq(Seq(" AA ")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2)))
1419+
HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
1420+
Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA ")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2)))
14211421
)
14221422

14231423
testCases.foreach(t => {
@@ -1478,8 +1478,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
14781478
Seq(Row(Row("aa"), 1, Row("aa"), 2))),
14791479
HashJoinTestCase("UNICODE_CI", "aa", "AA",
14801480
Seq(Row(Row("aa"), 1, Row("AA"), 2), Row(Row("aa"), 1, Row("aa"), 2))),
1481-
HashJoinTestCase("UNICODE_CI_TRIM", "aa", " AA ",
1482-
Seq(Row(Row("aa"), 1, Row(" AA "), 2), Row(Row("aa"), 1, Row("aa"), 2)))
1481+
HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
1482+
Seq(Row(Row("aa"), 1, Row("AA "), 2), Row(Row("aa"), 1, Row("aa"), 2)))
14831483
)
14841484
testCases.foreach(t => {
14851485
withTable(t1, t2) {
@@ -1533,8 +1533,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
15331533
HashJoinTestCase("UNICODE_CI", "aa", "AA",
15341534
Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA"))), 2),
15351535
Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
1536-
HashJoinTestCase("UNICODE_CI_TRIM", "aa", " AA ",
1537-
Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row(" AA "))), 2),
1536+
HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
1537+
Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA "))), 2),
15381538
Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2)))
15391539
)
15401540
testCases.foreach(t => {
@@ -1629,8 +1629,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
16291629
"'a', 'a', 1", "'a', 'A', 1", Row("a", "a", 1, "a", "A", 1)),
16301630
HashMultiJoinTestCase("STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI",
16311631
"'a', 'a', 1", "'A', 'A', 1", Row("a", "a", 1, "A", "A", 1)),
1632-
HashMultiJoinTestCase("STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI_TRIM",
1633-
"'a', 'a', 1", "'A', ' A ', 1", Row("a", "a", 1, "A", " A ", 1))
1632+
HashMultiJoinTestCase("STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI_RTRIM",
1633+
"'a', 'a', 1", "'A', 'A ', 1", Row("a", "a", 1, "A", "A ", 1))
16341634
)
16351635

16361636
testCases.foreach(t => {
@@ -1664,18 +1664,18 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
16641664
case class HllSketchAggTestCase[R](c: String, result: R)
16651665
val testCases = Seq(
16661666
HllSketchAggTestCase("UTF8_BINARY", 5),
1667-
HllSketchAggTestCase("UTF8_BINARY_TRIM", 4),
1667+
HllSketchAggTestCase("UTF8_BINARY_RTRIM", 4),
16681668
HllSketchAggTestCase("UTF8_LCASE", 4),
1669-
HllSketchAggTestCase("UTF8_LCASE_TRIM", 3),
1669+
HllSketchAggTestCase("UTF8_LCASE_RTRIM", 3),
16701670
HllSketchAggTestCase("UNICODE", 5),
1671-
HllSketchAggTestCase("UNICODE_TRIM", 4),
1671+
HllSketchAggTestCase("UNICODE_RTRIM", 4),
16721672
HllSketchAggTestCase("UNICODE_CI", 4),
1673-
HllSketchAggTestCase("UNICODE_CI_TRIM", 3)
1673+
HllSketchAggTestCase("UNICODE_CI_RTRIM", 3)
16741674
)
16751675
testCases.foreach(t => {
16761676
withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.c) {
16771677
val q = "SELECT hll_sketch_estimate(hll_sketch_agg(col)) FROM " +
1678-
"VALUES ('a'), ('A'), ('b'), ('b'), ('c'), (' c ') tab(col)"
1678+
"VALUES ('a'), ('A'), ('b'), ('b'), ('c'), ('c ') tab(col)"
16791679
val df = sql(q)
16801680
checkAnswer(df, Seq(Row(t.result)))
16811681
}

sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -871,8 +871,8 @@ class QueryCompilationErrorsSuite
871871
test("SPARK-49666: the trim collation feature is off without collate builder call") {
872872
withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
873873
Seq(
874-
"CREATE TABLE t(col STRING COLLATE EN_TRIM_CI) USING parquet",
875-
"CREATE TABLE t(col STRING COLLATE UTF8_LCASE_TRIM) USING parquet",
874+
"CREATE TABLE t(col STRING COLLATE EN_RTRIM_CI) USING parquet",
875+
"CREATE TABLE t(col STRING COLLATE UTF8_LCASE_RTRIM) USING parquet",
876876
"SELECT 'aaa' COLLATE UNICODE_LTRIM_CI"
877877
).foreach { sqlText =>
878878
checkError(
@@ -886,8 +886,8 @@ class QueryCompilationErrorsSuite
886886
test("SPARK-49666: the trim collation feature is off with collate builder call") {
887887
withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
888888
Seq(
889-
"SELECT collate('aaa', 'UNICODE_TRIM')",
890-
"SELECT collate('aaa', 'UTF8_BINARY_TRIM')",
889+
"SELECT collate('aaa', 'UNICODE_RTRIM')",
890+
"SELECT collate('aaa', 'UTF8_BINARY_RTRIM')",
891891
"SELECT collate('aaa', 'EN_AI_RTRIM')"
892892
).foreach { sqlText =>
893893
checkError(

sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
520520

521521
withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
522522
checkError(
523-
exception = intercept[AnalysisException](sql(s"SET COLLATION UNICODE_CI_TRIM")),
523+
exception = intercept[AnalysisException](sql(s"SET COLLATION UNICODE_CI_RTRIM")),
524524
condition = "UNSUPPORTED_FEATURE.TRIM_COLLATION"
525525
)
526526
}

0 commit comments

Comments
 (0)