Skip to content

Commit 0e22ceb

Browse files
committed
[SPARK-XXXX][SQL] Address review comments
- Register CrossJoinArrayContainsToInnerJoin in Optimizer.scala batches - Update benchmark to exclude rule for accurate unoptimized baseline - Remove Presto references from code comments
1 parent 146d170 commit 0e22ceb

File tree

3 files changed

+7
-5
lines changed

3 files changed

+7
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CrossJoinArrayContainsToInnerJoin.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,10 @@ import org.apache.spark.sql.types._
4141
* }}}
4242
*
4343
* This avoids the O(N*M) cross join by using unnesting and equi-join.
44-
*
45-
* Ported from Presto's CrossJoinWithArrayContainsToInnerJoin optimizer rule.
4644
*/
4745
object CrossJoinArrayContainsToInnerJoin extends Rule[LogicalPlan] with PredicateHelper {
4846

49-
// Supported element types for the optimization (matching Presto's supported types)
47+
// Supported element types for the optimization
5048
private val supportedTypes: Set[DataType] = Set(
5149
IntegerType, LongType, StringType, DateType
5250
)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
261261
Batch("Optimize One Row Plan", fixedPoint, OptimizeOneRowPlan),
262262
// The following batch should be executed after batch "Join Reorder" and "LocalRelation".
263263
Batch("Check Cartesian Products", Once,
264+
CrossJoinArrayContainsToInnerJoin,
264265
CheckCartesianProducts),
265266
Batch("RewriteSubquery", Once,
266267
RewritePredicateSubquery,

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CrossJoinArrayContainsToInnerJoinBenchmark.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,11 @@ object CrossJoinArrayContainsToInnerJoinBenchmark extends SqlBasedBenchmark {
7373
items.createOrReplaceTempView("items")
7474

7575
benchmark.addCase("Cross join + array_contains filter (unoptimized)", numIters = 3) { _ =>
76-
// Disable the optimization to simulate unoptimized behavior
77-
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
76+
// Disable the optimization to measure the true cross-join+filter baseline
77+
withSQLConf(
78+
SQLConf.CROSS_JOINS_ENABLED.key -> "true",
79+
SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
80+
"org.apache.spark.sql.catalyst.optimizer.CrossJoinArrayContainsToInnerJoin") {
7881
// This query would be a cross join with filter without optimization
7982
val df = spark.sql(
8083
"""

0 commit comments

Comments
 (0)