Skip to content

Commit c5a0d11

Browse files
aokolnychyihvanhovell
authored andcommitted
[SPARK-24575][SQL] Prohibit window expressions inside WHERE and HAVING clauses
## What changes were proposed in this pull request? As discussed [before](apache#19193 (comment)), this PR prohibits window expressions inside WHERE and HAVING clauses. ## How was this patch tested? This PR comes with a dedicated unit test. Author: aokolnychyi <[email protected]> Closes apache#21580 from aokolnychyi/spark-24575.
1 parent c8ef923 commit c5a0d11

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1923,6 +1923,9 @@ class Analyzer(
19231923
// "Aggregate with Having clause" will be triggered.
19241924
def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
19251925

1926+
case Filter(condition, _) if hasWindowFunction(condition) =>
1927+
failAnalysis("It is not allowed to use window functions inside WHERE and HAVING clauses")
1928+
19261929
// Aggregate with Having clause. This rule works with an unresolved Aggregate because
19271930
// a resolved Aggregate will not have Window Functions.
19281931
case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))

sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,14 @@
1717

1818
package org.apache.spark.sql
1919

20-
import java.sql.{Date, Timestamp}
21-
22-
import scala.collection.mutable
20+
import org.scalatest.Matchers.the
2321

2422
import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
2523
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
2624
import org.apache.spark.sql.functions._
2725
import org.apache.spark.sql.internal.SQLConf
2826
import org.apache.spark.sql.test.SharedSQLContext
2927
import org.apache.spark.sql.types._
30-
import org.apache.spark.unsafe.types.CalendarInterval
3128

3229
/**
3330
* Window function testing for DataFrame API.
@@ -624,4 +621,41 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
624621
}
625622
}
626623
}
624+
625+
test("SPARK-24575: Window functions inside WHERE and HAVING clauses") {
626+
def checkAnalysisError(df: => DataFrame): Unit = {
627+
val thrownException = the [AnalysisException] thrownBy {
628+
df.queryExecution.analyzed
629+
}
630+
assert(thrownException.message.contains("window functions inside WHERE and HAVING clauses"))
631+
}
632+
633+
checkAnalysisError(testData2.select('a).where(rank().over(Window.orderBy('b)) === 1))
634+
checkAnalysisError(testData2.where('b === 2 && rank().over(Window.orderBy('b)) === 1))
635+
checkAnalysisError(
636+
testData2.groupBy('a)
637+
.agg(avg('b).as("avgb"))
638+
.where('a > 'avgb && rank().over(Window.orderBy('a)) === 1))
639+
checkAnalysisError(
640+
testData2.groupBy('a)
641+
.agg(max('b).as("maxb"), sum('b).as("sumb"))
642+
.where(rank().over(Window.orderBy('a)) === 1))
643+
checkAnalysisError(
644+
testData2.groupBy('a)
645+
.agg(max('b).as("maxb"), sum('b).as("sumb"))
646+
.where('sumb === 5 && rank().over(Window.orderBy('a)) === 1))
647+
648+
checkAnalysisError(sql("SELECT a FROM testData2 WHERE RANK() OVER(ORDER BY b) = 1"))
649+
checkAnalysisError(sql("SELECT * FROM testData2 WHERE b = 2 AND RANK() OVER(ORDER BY b) = 1"))
650+
checkAnalysisError(
651+
sql("SELECT * FROM testData2 GROUP BY a HAVING a > AVG(b) AND RANK() OVER(ORDER BY a) = 1"))
652+
checkAnalysisError(
653+
sql("SELECT a, MAX(b), SUM(b) FROM testData2 GROUP BY a HAVING RANK() OVER(ORDER BY a) = 1"))
654+
checkAnalysisError(
655+
sql(
656+
s"""SELECT a, MAX(b)
657+
|FROM testData2
658+
|GROUP BY a
659+
|HAVING SUM(b) = 5 AND RANK() OVER(ORDER BY a) = 1""".stripMargin))
660+
}
627661
}

0 commit comments

Comments
 (0)