Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit 29a0be2

Browse files
committed
[SPARK-21129][SQL] Arguments of SQL function call should not be named expressions
### What changes were proposed in this pull request? Function argument should not be named expressions. It could cause two issues: - Misleading error message - Unexpected query results when the column name is `distinct`, which is not a reserved word in our parser. ``` spark-sql> select count(distinct c1, distinct c2) from t1; Error in query: cannot resolve '`distinct`' given input columns: [c1, c2]; line 1 pos 26; 'Project [unresolvedalias('count(c1#30, 'distinct), None)] +- SubqueryAlias t1 +- CatalogRelation `default`.`t1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#30, c2#31] ``` After the fix, the error message becomes ``` spark-sql> select count(distinct c1, distinct c2) from t1; Error in query: extraneous input 'c2' expecting {')', ',', '.', '[', 'OR', 'AND', 'IN', NOT, 'BETWEEN', 'LIKE', RLIKE, 'IS', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+', '-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}(line 1, pos 35) == SQL == select count(distinct c1, distinct c2) from t1 -----------------------------------^^^ ``` ### How was this patch tested? Added a test case to parser suite. Author: Xiao Li <[email protected]> Author: gatorsmile <[email protected]> Closes apache#18338 from gatorsmile/parserDistinctAggFunc. (cherry picked from commit eed9c4e) Signed-off-by: gatorsmile <[email protected]>
1 parent 8b08fd0 commit 29a0be2

File tree

7 files changed

+59
-5
lines changed

7 files changed

+59
-5
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,14 +552,15 @@ primaryExpression
552552
| CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
553553
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
554554
| CAST '(' expression AS dataType ')' #cast
555+
| STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct
555556
| FIRST '(' expression (IGNORE NULLS)? ')' #first
556557
| LAST '(' expression (IGNORE NULLS)? ')' #last
557558
| constant #constantDefault
558559
| ASTERISK #star
559560
| qualifiedName '.' ASTERISK #star
560561
| '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
561562
| '(' query ')' #subqueryExpression
562-
| qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')'
563+
| qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
563564
(OVER windowSpec)? #functionCall
564565
| value=primaryExpression '[' index=valueExpression ']' #subscript
565566
| identifier #columnReference

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ package object dsl {
168168
case Seq() => UnresolvedStar(None)
169169
case target => UnresolvedStar(Option(target))
170170
}
171+
def namedStruct(e: Expression*): Expression = CreateNamedStruct(e)
171172

172173
def callFunction[T, U](
173174
func: T => U,

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
10331033
Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType))
10341034
}
10351035

1036+
/**
1037+
* Create a [[CreateStruct]] expression.
1038+
*/
1039+
override def visitStruct(ctx: StructContext): Expression = withOrigin(ctx) {
1040+
CreateStruct(ctx.argument.asScala.map(expression))
1041+
}
1042+
10361043
/**
10371044
* Create a [[First]] expression.
10381045
*/
@@ -1056,7 +1063,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
10561063
// Create the function call.
10571064
val name = ctx.qualifiedName.getText
10581065
val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
1059-
val arguments = ctx.namedExpression().asScala.map(expression) match {
1066+
val arguments = ctx.argument.asScala.map(expression) match {
10601067
case Seq(UnresolvedStar(None))
10611068
if name.toLowerCase(Locale.ROOT) == "count" && !isDistinct =>
10621069
// Transform COUNT(*) into COUNT(1).

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ class ExpressionParserSuite extends PlanTest {
226226
assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
227227
assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
228228
assertEqual("`select`(all a, b)", 'select.function('a, 'b))
229-
assertEqual("foo(a as x, b as e)", 'foo.function('a as 'x, 'b as 'e))
229+
intercept("foo(a x)", "extraneous input 'x'")
230230
}
231231

232232
test("window function expressions") {
@@ -325,7 +325,9 @@ class ExpressionParserSuite extends PlanTest {
325325
assertEqual("a.b", UnresolvedAttribute("a.b"))
326326
assertEqual("`select`.b", UnresolvedAttribute("select.b"))
327327
assertEqual("(a + b).b", ('a + 'b).getField("b")) // This will fail analysis.
328-
assertEqual("struct(a, b).b", 'struct.function('a, 'b).getField("b"))
328+
assertEqual(
329+
"struct(a, b).b",
330+
namedStruct(NamePlaceholder, 'a, NamePlaceholder, 'b).getField("b"))
329331
}
330332

331333
test("reference") {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,12 @@ class PlanParserSuite extends PlanTest {
223223
assertEqual(s"$sql grouping sets((a, b), (a), ())",
224224
GroupingSets(Seq(Seq('a, 'b), Seq('a), Seq()), Seq('a, 'b), table("d"),
225225
Seq('a, 'b, 'sum.function('c).as("c"))))
226+
227+
val m = intercept[ParseException] {
228+
parsePlan("SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b")
229+
}.getMessage
230+
assert(m.contains("extraneous input 'b'"))
231+
226232
}
227233

228234
test("limit") {

sql/core/src/test/resources/sql-tests/inputs/struct.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,10 @@ SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x;
1818

1919
-- Prepend a column to a struct
2020
SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x;
21+
22+
-- Select a column from a struct
23+
SELECT ID, STRUCT(ST.*).C NST FROM tbl_x;
24+
SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x;
25+
26+
-- Select an alias from a struct
27+
SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x;

sql/core/src/test/resources/sql-tests/results/struct.sql.out

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 6
2+
-- Number of queries: 9
33

44

55
-- !query 0
@@ -58,3 +58,33 @@ struct<ID:int,NST:struct<AA:string,C:string,D:string>>
5858
1 {"AA":"1","C":"gamma","D":"delta"}
5959
2 {"AA":"2","C":"epsilon","D":"eta"}
6060
3 {"AA":"3","C":"theta","D":"iota"}
61+
62+
63+
-- !query 6
64+
SELECT ID, STRUCT(ST.*).C NST FROM tbl_x
65+
-- !query 6 schema
66+
struct<ID:int,NST:string>
67+
-- !query 6 output
68+
1 gamma
69+
2 epsilon
70+
3 theta
71+
72+
73+
-- !query 7
74+
SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x
75+
-- !query 7 schema
76+
struct<ID:int,NST:string>
77+
-- !query 7 output
78+
1 delta
79+
2 eta
80+
3 iota
81+
82+
83+
-- !query 8
84+
SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x
85+
-- !query 8 schema
86+
struct<ID:int,named_struct(STC, ST.C AS `C` AS `STC`, STD, ST.D AS `D` AS `STD`).STD:string>
87+
-- !query 8 output
88+
1 delta
89+
2 eta
90+
3 iota

0 commit comments

Comments
 (0)