-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-47672][SQL] Avoid double eval from filter pushDown w/ projection pushdown #46143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 66 commits
816d27d
4bf7342
5c3e437
8f44a4b
3408090
0ab43c0
fa486c9
b2f5d80
699f74d
8f2e942
83b0086
af86854
a597a70
8ec9cec
3dcab7a
338045c
f55337f
936c958
312c457
47dfdfa
10942ff
ccb798a
942b552
bb28e5d
6e8dd38
63c93cf
cbce842
a57acfa
a195c33
c654243
90b61a5
fe9787f
e409def
faa5d03
944bea6
071f2f3
195bc90
5c83ce1
f8e0663
4b57e4d
b1b42b8
be63017
1eb9e42
8a80e98
e0275ff
e0e233a
9c83e99
18c60c5
23f698a
89d32b7
4e0d9dc
8a88926
6ed0ab9
1d522dd
a102db8
646bc8a
073b5af
74f76ae
afbc2ea
f6bd41c
6e2fd3b
fb65283
93a1b61
511068e
07a9556
1a44511
8df91b0
eab7c8c
7cb8b59
79fbfc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions._ | |
| import org.apache.spark.sql.catalyst.plans._ | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.rules._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.unsafe.types.CalendarInterval | ||
|
|
||
|
|
@@ -52,11 +53,14 @@ class FilterPushdownSuite extends PlanTest { | |
| val attrB = $"b".int | ||
| val attrC = $"c".int | ||
| val attrD = $"d".int | ||
| val attrE = $"e".string | ||
|
|
||
| val testRelation = LocalRelation(attrA, attrB, attrC) | ||
|
|
||
| val testRelation1 = LocalRelation(attrD) | ||
|
|
||
| val testStringRelation = LocalRelation(attrA, attrB, attrE) | ||
|
|
||
| val simpleDisjunctivePredicate = | ||
| ("x.a".attr > 3) && ("y.a".attr > 13) || ("x.a".attr > 1) && ("y.a".attr > 11) | ||
| val expectedPredicatePushDownResult = { | ||
|
|
@@ -152,17 +156,137 @@ class FilterPushdownSuite extends PlanTest { | |
| test("can't push without rewrite") { | ||
| val originalQuery = | ||
| testRelation | ||
| .select($"a" + $"b" as "e") | ||
| .select($"a" + $"b" as "e", $"a" - $"b" as "f") | ||
| .where($"e" === 1) | ||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery.analyze) | ||
| val correctAnswer = | ||
| testRelation | ||
| .where($"a" + $"b" === 1) | ||
| .select($"a" + $"b" as "e") | ||
| .select($"a" + $"b" as "e", $"a" - $"b" as "f") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
| test("SPARK-47672: Do double evaluation when configured") { | ||
| withSQLConf(SQLConf.AVOID_DOUBLE_FILTER_EVAL.key -> "false") { | ||
| val originalQuery = testStringRelation | ||
| .select($"a", $"e".rlike("magic") as "f", $"e".rlike("notmagic") as "j", $"b") | ||
| .where($"a" > 5 && $"f") | ||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
||
| val correctAnswer = testStringRelation | ||
| .where($"a" > 5 && $"e".rlike("magic")) | ||
| .select($"a", $"e".rlike("magic") as "f", $"e".rlike("notmagic") as "j", $"b") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-47672: Make sure that we handle the case where everything is expensive") { | ||
| val originalQuery = testStringRelation | ||
| .select($"e".rlike("magic") as "f") | ||
| .where($"f") | ||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
||
| val correctAnswer = testStringRelation | ||
| .select($"e".rlike("magic") as "f") | ||
| .where($"f") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
| test("SPARK-47672: Ensure filter pushdown without alias reference does not move a projection.") { | ||
holdenk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| val originalQuery = testStringRelation | ||
| .select($"a", $"e".rlike("magic") as "f", $"b" + $"a") | ||
| .where($"a" > 5) | ||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
||
| val correctAnswer = testStringRelation | ||
| .where($"a" > 5) | ||
| .select($"a", $"e".rlike("magic") as "f", $"b" + $"a") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
|
|
||
| test("SPARK-47672: Inexpensive filter pushdown should not move projections") { | ||
| val originalQuery = testStringRelation | ||
| .select($"a" as "c", $"b" + $"a") | ||
| .where($"c" > 5) | ||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
||
| val correctAnswer = testStringRelation | ||
| .where($"a" > 5) | ||
| .select($"a" as "c", $"b" + $"a") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
| test("SPARK-47672: Avoid double evaluation with projections can't push past certain items") { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this just a more complicated case of the test
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pretty much, makes sure we handle the split correctly though. |
||
| val originalQuery = testStringRelation | ||
| .select($"a", $"e".rlike("magic") as "f") | ||
| .where($"a" > 5 || $"f") | ||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
||
| val correctAnswer = testStringRelation | ||
| .select($"a", $"e".rlike("magic") as "f") | ||
| .where($"a" > 5 || $"f") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
| // Case 1: Multiple filters that don't reference any projection aliases - all should be pushed | ||
| test("SPARK-47672: Case 1 - multiple filters not referencing projection aliases") { | ||
| val originalQuery = testStringRelation | ||
| .select($"a" as "c", $"e".rlike("magic") as "f", $"b" as "d") | ||
| .where($"c" > 5 && $"d" < 10) | ||
holdenk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
||
| // Both filters on c and d should be pushed down since they just reference | ||
| // simple aliases (c->a, d->b) which are inexpensive | ||
| val correctAnswer = testStringRelation | ||
| .where($"a" > 5 && $"b" < 10) | ||
| .select($"a" as "c", $"e".rlike("magic") as "f", $"b" as "d") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
| // Case 2: Multiple filters with inexpensive references - all should be pushed | ||
holdenk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| test("SPARK-47672: Case 2 - multiple filters with inexpensive alias references") { | ||
| val originalQuery = testStringRelation | ||
| .select($"a" + $"b" as "sum", $"a" - $"b" as "diff", $"e".rlike("magic") as "f") | ||
| .where($"sum" > 10 && $"diff" < 5) | ||
| .analyze | ||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
||
| // Both sum and diff are inexpensive (arithmetic), so both filters should be pushed | ||
| val correctAnswer = testStringRelation | ||
| .where($"a" + $"b" > 10 && $"a" - $"b" < 5) | ||
| .select($"a" + $"b" as "sum", $"a" - $"b" as "diff", $"e".rlike("magic") as "f") | ||
| .analyze | ||
|
|
||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.