Skip to content

Commit 0afe2a9

Browse files
authored
Merge pull request #3231 from dolthub/angela/joinfilterdrop
Push filters down into join condition
2 parents e40c989 + 0a3f608 commit 0afe2a9

File tree

11 files changed

+2955
-2922
lines changed

11 files changed

+2955
-2922
lines changed

enginetest/enginetests.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ func TestSpatialQueriesPrepared(t *testing.T, harness Harness) {
148148

149149
// TestJoinQueries tests join queries against a provided harness.
150150
func TestJoinQueries(t *testing.T, harness Harness) {
151-
harness.Setup(setup.MydbData, setup.MytableData, setup.Pk_tablesData, setup.OthertableData, setup.NiltableData, setup.XyData, setup.FooData)
151+
harness.Setup(setup.MydbData, setup.MytableData, setup.Pk_tablesData, setup.OthertableData, setup.NiltableData, setup.XyData, setup.FooData, setup.Comp_index_tablesData)
152152
e, err := harness.NewEngine(t)
153153
require.NoError(t, err)
154154

@@ -263,7 +263,7 @@ func TestQueriesPrepared(t *testing.T, harness Harness) {
263263

264264
// TestJoinQueriesPrepared tests join queries as prepared statements against a provided harness.
265265
func TestJoinQueriesPrepared(t *testing.T, harness Harness) {
266-
harness.Setup(setup.MydbData, setup.MytableData, setup.Pk_tablesData, setup.OthertableData, setup.NiltableData, setup.XyData, setup.FooData)
266+
harness.Setup(setup.MydbData, setup.MytableData, setup.Pk_tablesData, setup.OthertableData, setup.NiltableData, setup.XyData, setup.FooData, setup.Comp_index_tablesData)
267267
for _, tt := range queries.JoinQueryTests {
268268
if tt.SkipPrepared {
269269
continue

enginetest/join_planning_tests.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,9 +425,9 @@ var JoinPlanningTests = []joinPlanScript{
425425
exp: []sql.Row{{0, 2}, {2, 1}, {3, 3}},
426426
},
427427
{
428-
// anti join will be cross-join-right, be passed non-nil parent row
428+
// anti join will be cross-join-right, then converted to inner join when filters are pushed down, be passed non-nil parent row
429429
q: "select x,a from ab, (select * from xy where x != (select r from rs where r = 1) order by 1) sq where x = 2 and b = 2 order by 1,2;",
430-
types: []plan.JoinType{plan.JoinTypeCrossHash, plan.JoinTypeLeftOuter},
430+
types: []plan.JoinType{plan.JoinTypeInner, plan.JoinTypeLeftOuter},
431431
exp: []sql.Row{{2, 0}, {2, 1}, {2, 2}},
432432
},
433433
{

enginetest/queries/integration_plans.go

Lines changed: 1796 additions & 1862 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

enginetest/queries/join_queries.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,18 @@ on w = 0;`,
789789
Query: "select * from foo.othertable join mydb.othertable on foo.othertable.text = 'a'",
790790
Expected: []sql.Row{{"a", 4, "third", 1}, {"a", 4, "second", 2}, {"a", 4, "first", 3}},
791791
},
792+
{
793+
// Regression test ensuring that filters are not dropped after join optimization
794+
// https://github.com/dolthub/dolt/issues/9868
795+
Query: "select * from comp_index_t0 c join comp_index_t0 b join comp_index_t0 a on a.v2 = b.pk and b.v2 = c.pk and c.v2 = 1",
796+
Expected: []sql.Row{},
797+
},
798+
{
799+
// Regression test ensuring that filters are not dropped after join optimization
800+
// https://github.com/dolthub/dolt/issues/9868
801+
Query: "select * from comp_index_t0 a join comp_index_t0 b join comp_index_t0 c on a.v2 = b.pk and b.v2 = c.pk and c.v2 = 5",
802+
Expected: []sql.Row{},
803+
},
792804
}
793805

794806
var JoinScriptTests = []ScriptTest{

enginetest/queries/query_plans.go

Lines changed: 581 additions & 455 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

enginetest/queries/tpch_plans.go

Lines changed: 514 additions & 581 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

enginetest/scriptgen/setup/helper.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,5 +115,6 @@ var (
115115
XyData,
116116
Invert_pkData,
117117
JoinData,
118+
Comp_index_tablesData,
118119
}
119120
)

enginetest/spatial_index_tests.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,10 +391,14 @@ func evalSpatialIndexPlanTest(t *testing.T, harness Harness, e QueryEngine, quer
391391
if n == nil {
392392
return false
393393
}
394-
if _, ok := n.(*plan.Filter); ok {
394+
switch n := n.(type) {
395+
case *plan.Filter:
395396
hasFilter = true
396-
}
397-
if _, ok := n.(*plan.IndexedTableAccess); ok {
397+
case *plan.JoinNode:
398+
if n.Filter != nil {
399+
hasFilter = true
400+
}
401+
case *plan.IndexedTableAccess:
398402
hasRightOrder = hasFilter
399403
hasIndex = true
400404
}

sql/analyzer/pushdown.go

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func pushFilters(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, s
8787

8888
// move filter predicates directly above their respective tables in joins
8989
ret, same, err := pushdownAboveTables(n, filters)
90-
if same || err != nil {
90+
if err != nil {
9191
return n, transform.SameTree, err
9292
}
9393

@@ -96,7 +96,7 @@ func pushFilters(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, s
9696
return n, transform.SameTree, fmt.Errorf("pushdown mistakenly converted filter to non-filter: %T", ret)
9797
}
9898
// remove handled
99-
newF := removePushedDownPredicates(ctx, a, retF, filters)
99+
newF := updateFilterNode(ctx, a, retF, filters)
100100
if newF != nil {
101101
same = transform.NewTree
102102
ret = newF
@@ -202,7 +202,7 @@ func transformPushdownSubqueryAliasFilters(ctx *sql.Context, a *Analyzer, n sql.
202202
return transform.NodeWithCtx(n, filterPushdownChildSelector, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {
203203
switch node := c.Node.(type) {
204204
case *plan.Filter:
205-
newF := removePushedDownPredicates(ctx, a, node, filters)
205+
newF := updateFilterNode(ctx, a, node, filters)
206206
if newF == nil {
207207
return node, transform.SameTree, nil
208208
}
@@ -311,22 +311,38 @@ func pushdownFiltersUnderSubqueryAlias(ctx *sql.Context, a *Analyzer, sa *plan.S
311311
return n, transform.NewTree, nil
312312
}
313313

314-
// removePushedDownPredicates removes all handled filter predicates from the filter given and returns. If all
315-
// predicates have been handled, it replaces the filter with its child.
316-
func removePushedDownPredicates(ctx *sql.Context, a *Analyzer, node *plan.Filter, filters *filterSet) sql.Node {
317-
if filters.handledCount() == 0 {
318-
a.Log("no handled filters, leaving filter untouched")
319-
return nil
320-
}
321-
322-
// figure out if the filter's filters were all handled
314+
// updateFilterNode updates the filter node based on the filter predicates handled. Any handled filter predicates are
315+
// removed from the filter node. If all filter predicates have been handled and there are no unhandled predicates, the
316+
// filter node is removed. If there are remaining filter predicates and the immediate child of the filter is a non-outer
317+
// join, the remaining unhandled filters are pushed into the join node and added to the join filters, and the filter
318+
// node is removed.
319+
func updateFilterNode(ctx *sql.Context, a *Analyzer, node *plan.Filter, filters *filterSet) sql.Node {
323320
filterExpressions := expression.SplitConjunction(node.Expression)
324321
unhandled := subtractExprSet(filterExpressions, filters.handledFilters)
322+
325323
if len(unhandled) == 0 {
326324
a.Log("filter node has no unhandled filters, so it will be removed")
327325
return node.Child
328326
}
329327

328+
// push filters into joinChild
329+
if joinChild, ok := node.Child.(*plan.JoinNode); ok && !joinChild.Op.IsOuter() {
330+
a.Log("pushing filters into join node")
331+
if joinChild.Op.IsCross() {
332+
return plan.NewInnerJoin(joinChild.Left(), joinChild.Right(), expression.JoinAnd(unhandled...))
333+
}
334+
if joinChild.Filter != nil {
335+
unhandled = append(unhandled, joinChild.Filter)
336+
}
337+
joinChild.Filter = expression.JoinAnd(unhandled...)
338+
return joinChild
339+
}
340+
341+
if filters.handledCount() == 0 {
342+
a.Log("no handled filters, leaving filter untouched")
343+
return nil
344+
}
345+
330346
if len(unhandled) == len(filterExpressions) {
331347
a.Log("no filters removed from filter node")
332348
return nil

sql/memo/join_order_builder.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -395,15 +395,15 @@ func (j *joinOrderBuilder) hasEqEdge(leftCol, rightCol sql.ColumnId) bool {
395395
return false
396396
}
397397

398-
func (j *joinOrderBuilder) findVertexFromCol(col sql.ColumnId) (vertexIndex, GroupId) {
398+
func (j *joinOrderBuilder) findVertexFromCol(col sql.ColumnId) (vertexIndex, GroupId, bool) {
399399
for i, v := range j.vertices {
400400
if t, ok := v.(SourceRel); ok {
401401
if t.Group().RelProps.FuncDeps().All().Contains(col) {
402-
return vertexIndex(i), t.Group().Id
402+
return vertexIndex(i), t.Group().Id, true
403403
}
404404
}
405405
}
406-
panic("vertex not found")
406+
return 0, 0, false
407407
}
408408

409409
func (j *joinOrderBuilder) findVertexFromGroup(grp GroupId) vertexIndex {
@@ -421,8 +421,11 @@ func (j *joinOrderBuilder) findVertexFromGroup(grp GroupId) vertexIndex {
421421
// on an equality filter between two columns.
422422
func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) {
423423
var vert vertexSet
424-
v1, _ := j.findVertexFromCol(col1)
425-
v2, _ := j.findVertexFromCol(col2)
424+
v1, _, v1found := j.findVertexFromCol(col1)
425+
v2, _, v2found := j.findVertexFromCol(col2)
426+
if !v1found || !v2found {
427+
return
428+
}
426429
vert = vert.add(v1).add(v2)
427430

428431
// find edge where the vertices are provided but partitioned

0 commit comments

Comments
 (0)