Skip to content

Commit fe5145e

Browse files
wangyumdongjoon-hyun
authored andcommitted
[SPARK-28109][SQL] Fix TRIM(type trimStr FROM str) returns incorrect value
## What changes were proposed in this pull request? [SPARK-28093](https://issues.apache.org/jira/browse/SPARK-28093) fixed `TRIM/LTRIM/RTRIM('str', 'trimStr')` returns an incorrect value, but that fix introduced a new bug, `TRIM(type trimStr FROM str)` returns an incorrect value. This pr fix this issue. ## How was this patch tested? unit tests and manual tests: Before this PR: ```sql spark-sql> SELECT trim('yxTomxx', 'xyz'), trim(BOTH 'xyz' FROM 'yxTomxx'); Tom z spark-sql> SELECT trim('xxxbarxxx', 'x'), trim(BOTH 'x' FROM 'xxxbarxxx'); bar spark-sql> SELECT ltrim('zzzytest', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytest'); test xyz spark-sql> SELECT ltrim('zzzytestxyz', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytestxyz'); testxyz spark-sql> SELECT ltrim('xyxXxyLAST WORD', 'xy'), trim(LEADING 'xy' FROM 'xyxXxyLAST WORD'); XxyLAST WORD spark-sql> SELECT rtrim('testxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'testxxzx'); test xy spark-sql> SELECT rtrim('xyztestxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'xyztestxxzx'); xyztest spark-sql> SELECT rtrim('TURNERyxXxy', 'xy'), trim(TRAILING 'xy' FROM 'TURNERyxXxy'); TURNERyxX ``` After this PR: ```sql spark-sql> SELECT trim('yxTomxx', 'xyz'), trim(BOTH 'xyz' FROM 'yxTomxx'); Tom Tom spark-sql> SELECT trim('xxxbarxxx', 'x'), trim(BOTH 'x' FROM 'xxxbarxxx'); bar bar spark-sql> SELECT ltrim('zzzytest', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytest'); test test spark-sql> SELECT ltrim('zzzytestxyz', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytestxyz'); testxyz testxyz spark-sql> SELECT ltrim('xyxXxyLAST WORD', 'xy'), trim(LEADING 'xy' FROM 'xyxXxyLAST WORD'); XxyLAST WORD XxyLAST WORD spark-sql> SELECT rtrim('testxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'testxxzx'); test test spark-sql> SELECT rtrim('xyztestxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'xyztestxxzx'); xyztest xyztest spark-sql> SELECT rtrim('TURNERyxXxy', 'xy'), trim(TRAILING 'xy' FROM 'TURNERyxXxy'); TURNERyxX TURNERyxX ``` And PostgreSQL: ```sql postgres=# SELECT trim('yxTomxx', 'xyz'), trim(BOTH 'xyz' FROM 'yxTomxx'); btrim | btrim -------+------- Tom | Tom (1 row) postgres=# SELECT trim('xxxbarxxx', 'x'), trim(BOTH 'x' FROM 'xxxbarxxx'); btrim | btrim -------+------- bar | bar (1 row) postgres=# SELECT ltrim('zzzytest', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytest'); ltrim | ltrim -------+------- test | test (1 row) postgres=# SELECT ltrim('zzzytestxyz', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytestxyz'); ltrim | ltrim ---------+--------- testxyz | testxyz (1 row) postgres=# SELECT ltrim('xyxXxyLAST WORD', 'xy'), trim(LEADING 'xy' FROM 'xyxXxyLAST WORD'); ltrim | ltrim --------------+-------------- XxyLAST WORD | XxyLAST WORD (1 row) postgres=# SELECT rtrim('testxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'testxxzx'); rtrim | rtrim -------+------- test | test (1 row) postgres=# SELECT rtrim('xyztestxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'xyztestxxzx'); rtrim | rtrim ---------+--------- xyztest | xyztest (1 row) postgres=# SELECT rtrim('TURNERyxXxy', 'xy'), trim(TRAILING 'xy' FROM 'TURNERyxXxy'); rtrim | rtrim -----------+----------- TURNERyxX | TURNERyxX (1 row) ``` Closes apache#24911 from wangyum/SPARK-28109. Authored-by: Yuming Wang <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 630dfdf commit fe5145e

File tree

7 files changed

+82
-73
lines changed

7 files changed

+82
-73
lines changed

docs/sql-keywords.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ Below is a list of all the keywords in Spark SQL.
263263
<tr><td>TRANSACTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
264264
<tr><td>TRANSACTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
265265
<tr><td>TRANSFORM</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
266+
<tr><td>TRIM</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
266267
<tr><td>TRUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
267268
<tr><td>TRUNCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
268269
<tr><td>UNARCHIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -694,8 +694,6 @@ primaryExpression
694694
| '(' query ')' #subqueryExpression
695695
| qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
696696
(OVER windowSpec)? #functionCall
697-
| qualifiedName '(' trimOption=(BOTH | LEADING | TRAILING) (argument+=expression)?
698-
FROM argument+=expression ')' #functionCall
699697
| IDENTIFIER '->' expression #lambda
700698
| '(' IDENTIFIER (',' IDENTIFIER)+ ')' '->' expression #lambda
701699
| value=primaryExpression '[' index=valueExpression ']' #subscript
@@ -705,6 +703,8 @@ primaryExpression
705703
| EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract
706704
| (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression
707705
((FOR | ',') len=valueExpression)? ')' #substring
706+
| TRIM '(' trimOption=(BOTH | LEADING | TRAILING) (trimStr=valueExpression)?
707+
FROM srcStr=valueExpression ')' #trim
708708
;
709709

710710
constant
@@ -1059,6 +1059,7 @@ ansiNonReserved
10591059
| TRANSACTION
10601060
| TRANSACTIONS
10611061
| TRANSFORM
1062+
| TRIM
10621063
| TRUE
10631064
| TRUNCATE
10641065
| UNARCHIVE
@@ -1319,6 +1320,7 @@ nonReserved
13191320
| TRANSACTION
13201321
| TRANSACTIONS
13211322
| TRANSFORM
1323+
| TRIM
13221324
| TRUE
13231325
| TRUNCATE
13241326
| TYPE
@@ -1577,6 +1579,7 @@ TRAILING: 'TRAILING';
15771579
TRANSACTION: 'TRANSACTION';
15781580
TRANSACTIONS: 'TRANSACTIONS';
15791581
TRANSFORM: 'TRANSFORM';
1582+
TRIM: 'TRIM';
15801583
TRUE: 'TRUE';
15811584
TRUNCATE: 'TRUNCATE';
15821585
TYPE: 'TYPE';

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,29 +1403,28 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
14031403
}
14041404

14051405
/**
1406-
* Create a (windowed)/trim Function expression.
1406+
* Create a Trim expression.
1407+
*/
1408+
override def visitTrim(ctx: TrimContext): Expression = withOrigin(ctx) {
1409+
val srcStr = expression(ctx.srcStr)
1410+
val trimStr = Option(ctx.trimStr).map(expression)
1411+
ctx.trimOption.getType match {
1412+
case SqlBaseParser.BOTH =>
1413+
StringTrim(srcStr, trimStr)
1414+
case SqlBaseParser.LEADING =>
1415+
StringTrimLeft(srcStr, trimStr)
1416+
case SqlBaseParser.TRAILING =>
1417+
StringTrimRight(srcStr, trimStr)
1418+
case other =>
1419+
throw new ParseException("Function trim doesn't support with " +
1420+
s"type $other. Please use BOTH, LEADING or TRAILING as trim type", ctx)
1421+
}
1422+
}
1423+
1424+
/**
1425+
* Create a (windowed) Function expression.
14071426
*/
14081427
override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) {
1409-
def replaceFunctions(
1410-
funcID: FunctionIdentifier,
1411-
ctx: FunctionCallContext): FunctionIdentifier = {
1412-
val opt = ctx.trimOption
1413-
if (opt != null) {
1414-
if (ctx.qualifiedName.getText.toLowerCase(Locale.ROOT) != "trim") {
1415-
throw new ParseException(s"The specified function ${ctx.qualifiedName.getText} " +
1416-
s"doesn't support with option ${opt.getText}.", ctx)
1417-
}
1418-
opt.getType match {
1419-
case SqlBaseParser.BOTH => funcID
1420-
case SqlBaseParser.LEADING => funcID.copy(funcName = "ltrim")
1421-
case SqlBaseParser.TRAILING => funcID.copy(funcName = "rtrim")
1422-
case _ => throw new ParseException("Function trim doesn't support with " +
1423-
s"type ${opt.getType}. Please use BOTH, LEADING or TRAILING as trim type", ctx)
1424-
}
1425-
} else {
1426-
funcID
1427-
}
1428-
}
14291428
// Create the function call.
14301429
val name = ctx.qualifiedName.getText
14311430
val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
@@ -1437,9 +1436,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
14371436
case expressions =>
14381437
expressions
14391438
}
1440-
val funcId = replaceFunctions(visitFunctionName(ctx.qualifiedName), ctx)
1441-
val function = UnresolvedFunction(funcId, arguments, isDistinct)
1442-
1439+
val function = UnresolvedFunction(visitFunctionName(ctx.qualifiedName), arguments, isDistinct)
14431440

14441441
// Check if the function is evaluated in a windowed context.
14451442
ctx.windowSpec match {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.parser
1919

2020
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
21-
import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction}
21+
import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction}
2222
import org.apache.spark.sql.catalyst.expressions._
2323
import org.apache.spark.sql.catalyst.plans._
2424
import org.apache.spark.sql.catalyst.plans.logical._
@@ -702,33 +702,40 @@ class PlanParserSuite extends AnalysisTest {
702702
}
703703

704704
test("TRIM function") {
705-
intercept("select ltrim(both 'S' from 'SS abc S'", "missing ')' at '<EOF>'")
706-
intercept("select rtrim(trailing 'S' from 'SS abc S'", "missing ')' at '<EOF>'")
705+
def assertTrimPlans(inputSQL: String, expectedExpression: Expression): Unit = {
706+
comparePlans(
707+
parsePlan(inputSQL),
708+
Project(Seq(UnresolvedAlias(expectedExpression)), OneRowRelation())
709+
)
710+
}
707711

708-
assertEqual(
712+
intercept("select ltrim(both 'S' from 'SS abc S'", "mismatched input 'from' expecting {')'")
713+
intercept("select rtrim(trailing 'S' from 'SS abc S'", "mismatched input 'from' expecting {')'")
714+
715+
assertTrimPlans(
709716
"SELECT TRIM(BOTH '@$%&( )abc' FROM '@ $ % & ()abc ' )",
710-
OneRowRelation().select('TRIM.function("@$%&( )abc", "@ $ % & ()abc "))
717+
StringTrim(Literal("@ $ % & ()abc "), Some(Literal("@$%&( )abc")))
711718
)
712-
assertEqual(
719+
assertTrimPlans(
713720
"SELECT TRIM(LEADING 'c []' FROM '[ ccccbcc ')",
714-
OneRowRelation().select('ltrim.function("c []", "[ ccccbcc "))
721+
StringTrimLeft(Literal("[ ccccbcc "), Some(Literal("c []")))
715722
)
716-
assertEqual(
723+
assertTrimPlans(
717724
"SELECT TRIM(TRAILING 'c&^,.' FROM 'bc...,,,&&&ccc')",
718-
OneRowRelation().select('rtrim.function("c&^,.", "bc...,,,&&&ccc"))
725+
StringTrimRight(Literal("bc...,,,&&&ccc"), Some(Literal("c&^,.")))
719726
)
720727

721-
assertEqual(
728+
assertTrimPlans(
722729
"SELECT TRIM(BOTH FROM ' bunch o blanks ')",
723-
OneRowRelation().select('TRIM.function(" bunch o blanks "))
730+
StringTrim(Literal(" bunch o blanks "), None)
724731
)
725-
assertEqual(
732+
assertTrimPlans(
726733
"SELECT TRIM(LEADING FROM ' bunch o blanks ')",
727-
OneRowRelation().select('ltrim.function(" bunch o blanks "))
734+
StringTrimLeft(Literal(" bunch o blanks "), None)
728735
)
729-
assertEqual(
736+
assertTrimPlans(
730737
"SELECT TRIM(TRAILING FROM ' bunch o blanks ')",
731-
OneRowRelation().select('rtrim.function(" bunch o blanks "))
738+
StringTrimRight(Literal(" bunch o blanks "), None)
732739
)
733740
}
734741

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
246246
"transaction",
247247
"transactions",
248248
"trigger",
249+
"trim",
249250
"true",
250251
"truncate",
251252
"unarchive",

sql/core/src/test/resources/sql-tests/inputs/string-functions.sql

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ SELECT substring('Spark SQL' from -3);
4040
SELECT substring('Spark SQL' from 5 for 1);
4141

4242
-- trim/ltrim/rtrim
43-
SELECT trim('yxTomxx', 'xyz');
44-
SELECT trim('xxxbarxxx', 'x');
45-
SELECT ltrim('zzzytest', 'xyz');
46-
SELECT ltrim('zzzytestxyz', 'xyz');
47-
SELECT ltrim('xyxXxyLAST WORD', 'xy');
48-
SELECT rtrim('testxxzx', 'xyz');
49-
SELECT rtrim('xyztestxxzx', 'xyz');
50-
SELECT rtrim('TURNERyxXxy', 'xy');
43+
SELECT trim('yxTomxx', 'xyz'), trim(BOTH 'xyz' FROM 'yxTomxx');
44+
SELECT trim('xxxbarxxx', 'x'), trim(BOTH 'x' FROM 'xxxbarxxx');
45+
SELECT ltrim('zzzytest', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytest');
46+
SELECT ltrim('zzzytestxyz', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytestxyz');
47+
SELECT ltrim('xyxXxyLAST WORD', 'xy'), trim(LEADING 'xy' FROM 'xyxXxyLAST WORD');
48+
SELECT rtrim('testxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'testxxzx');
49+
SELECT rtrim('xyztestxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'xyztestxxzx');
50+
SELECT rtrim('TURNERyxXxy', 'xy'), trim(TRAILING 'xy' FROM 'TURNERyxXxy');

sql/core/src/test/resources/sql-tests/results/string-functions.sql.out

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -205,64 +205,64 @@ k
205205

206206

207207
-- !query 25
208-
SELECT trim('yxTomxx', 'xyz')
208+
SELECT trim('yxTomxx', 'xyz'), trim(BOTH 'xyz' FROM 'yxTomxx')
209209
-- !query 25 schema
210-
struct<trim(yxTomxx, xyz):string>
210+
struct<trim(yxTomxx, xyz):string,trim(yxTomxx, xyz):string>
211211
-- !query 25 output
212-
Tom
212+
Tom Tom
213213

214214

215215
-- !query 26
216-
SELECT trim('xxxbarxxx', 'x')
216+
SELECT trim('xxxbarxxx', 'x'), trim(BOTH 'x' FROM 'xxxbarxxx')
217217
-- !query 26 schema
218-
struct<trim(xxxbarxxx, x):string>
218+
struct<trim(xxxbarxxx, x):string,trim(xxxbarxxx, x):string>
219219
-- !query 26 output
220-
bar
220+
bar bar
221221

222222

223223
-- !query 27
224-
SELECT ltrim('zzzytest', 'xyz')
224+
SELECT ltrim('zzzytest', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytest')
225225
-- !query 27 schema
226-
struct<ltrim(zzzytest, xyz):string>
226+
struct<ltrim(zzzytest, xyz):string,ltrim(zzzytest, xyz):string>
227227
-- !query 27 output
228-
test
228+
test test
229229

230230

231231
-- !query 28
232-
SELECT ltrim('zzzytestxyz', 'xyz')
232+
SELECT ltrim('zzzytestxyz', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytestxyz')
233233
-- !query 28 schema
234-
struct<ltrim(zzzytestxyz, xyz):string>
234+
struct<ltrim(zzzytestxyz, xyz):string,ltrim(zzzytestxyz, xyz):string>
235235
-- !query 28 output
236-
testxyz
236+
testxyz testxyz
237237

238238

239239
-- !query 29
240-
SELECT ltrim('xyxXxyLAST WORD', 'xy')
240+
SELECT ltrim('xyxXxyLAST WORD', 'xy'), trim(LEADING 'xy' FROM 'xyxXxyLAST WORD')
241241
-- !query 29 schema
242-
struct<ltrim(xyxXxyLAST WORD, xy):string>
242+
struct<ltrim(xyxXxyLAST WORD, xy):string,ltrim(xyxXxyLAST WORD, xy):string>
243243
-- !query 29 output
244-
XxyLAST WORD
244+
XxyLAST WORD XxyLAST WORD
245245

246246

247247
-- !query 30
248-
SELECT rtrim('testxxzx', 'xyz')
248+
SELECT rtrim('testxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'testxxzx')
249249
-- !query 30 schema
250-
struct<rtrim(testxxzx, xyz):string>
250+
struct<rtrim(testxxzx, xyz):string,rtrim(testxxzx, xyz):string>
251251
-- !query 30 output
252-
test
252+
test test
253253

254254

255255
-- !query 31
256-
SELECT rtrim('xyztestxxzx', 'xyz')
256+
SELECT rtrim('xyztestxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'xyztestxxzx')
257257
-- !query 31 schema
258-
struct<rtrim(xyztestxxzx, xyz):string>
258+
struct<rtrim(xyztestxxzx, xyz):string,rtrim(xyztestxxzx, xyz):string>
259259
-- !query 31 output
260-
xyztest
260+
xyztest xyztest
261261

262262

263263
-- !query 32
264-
SELECT rtrim('TURNERyxXxy', 'xy')
264+
SELECT rtrim('TURNERyxXxy', 'xy'), trim(TRAILING 'xy' FROM 'TURNERyxXxy')
265265
-- !query 32 schema
266-
struct<rtrim(TURNERyxXxy, xy):string>
266+
struct<rtrim(TURNERyxXxy, xy):string,rtrim(TURNERyxXxy, xy):string>
267267
-- !query 32 output
268-
TURNERyxX
268+
TURNERyxX TURNERyxX

0 commit comments

Comments
 (0)