Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit fab070c

Browse files
gatorsmilecloud-fan
authored andcommitted
[SPARK-21132][SQL] DISTINCT modifier of function arguments should not be silently ignored
### What changes were proposed in this pull request? We should not silently ignore `DISTINCT` when they are not supported in the function arguments. This PR is to block these cases and issue the error messages. ### How was this patch tested? Added test cases for both regular functions and window functions Author: Xiao Li <[email protected]> Closes apache#18340 from gatorsmile/firstCount. (cherry picked from commit 9413b84) Signed-off-by: Wenchen Fan <[email protected]>
1 parent d3c79b7 commit fab070c

File tree

3 files changed

+31
-6
lines changed

3 files changed

+31
-6
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,11 +1189,21 @@ class Analyzer(
11891189
// AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
11901190
// the context of a Window clause. They do not need to be wrapped in an
11911191
// AggregateExpression.
1192-
case wf: AggregateWindowFunction => wf
1192+
case wf: AggregateWindowFunction =>
1193+
if (isDistinct) {
1194+
failAnalysis(s"${wf.prettyName} does not support the modifier DISTINCT")
1195+
} else {
1196+
wf
1197+
}
11931198
// We get an aggregate function, we need to wrap it in an AggregateExpression.
11941199
case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
11951200
// This function is not an aggregate function, just return the resolved one.
1196-
case other => other
1201+
case other =>
1202+
if (isDistinct) {
1203+
failAnalysis(s"${other.prettyName} does not support the modifier DISTINCT")
1204+
} else {
1205+
other
1206+
}
11971207
}
11981208
}
11991209
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
2424
import org.apache.spark.sql.catalyst.dsl.plans._
2525
import org.apache.spark.sql.catalyst.expressions._
2626
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
27-
import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftOuter, RightOuter}
27+
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
28+
import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
2829
import org.apache.spark.sql.catalyst.plans.logical._
2930
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
3031
import org.apache.spark.sql.types._
@@ -152,7 +153,7 @@ class AnalysisErrorSuite extends AnalysisTest {
152153
"not supported within a window function" :: Nil)
153154

154155
errorTest(
155-
"distinct window function",
156+
"distinct aggregate function in window",
156157
testRelation2.select(
157158
WindowExpression(
158159
AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
@@ -162,6 +163,16 @@ class AnalysisErrorSuite extends AnalysisTest {
162163
UnspecifiedFrame)).as('window)),
163164
"Distinct window functions are not supported" :: Nil)
164165

166+
errorTest(
167+
"distinct function",
168+
CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
169+
"hex does not support the modifier DISTINCT" :: Nil)
170+
171+
errorTest(
172+
"distinct window function",
173+
CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM TaBlE"),
174+
"percent_rank does not support the modifier DISTINCT" :: Nil)
175+
165176
errorTest(
166177
"nested aggregate functions",
167178
testRelation.groupBy('a)(

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717

1818
package org.apache.spark.sql.catalyst.analysis
1919

20+
import java.net.URI
2021
import java.util.Locale
2122

2223
import org.apache.spark.sql.AnalysisException
23-
import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
24+
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
2425
import org.apache.spark.sql.catalyst.plans.PlanTest
2526
import org.apache.spark.sql.catalyst.plans.logical._
2627
import org.apache.spark.sql.internal.SQLConf
@@ -32,7 +33,10 @@ trait AnalysisTest extends PlanTest {
3233

3334
private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
3435
val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
35-
val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
36+
val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
37+
catalog.createDatabase(
38+
CatalogDatabase("default", "", new URI("loc"), Map.empty),
39+
ignoreIfExists = false)
3640
catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
3741
catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
3842
new Analyzer(catalog, conf) {

0 commit comments

Comments
 (0)