Skip to content

Commit 125c97f

Browse files
committed
opt: fix incorrect filter elimination in join reordering
A bug in join reordering which incorrectly eliminated filters has been fixed. The join reordering logic could sometimes eliminate filters of the form `a=a` when constructing new inner-joins. It incorrectly assumed these filters to be redundant because a column is always equivalent to itself. However, `a=a` is a null-rejecting filter, so it can only be omitted if `a` is guaranteed to be non-null by something else, like another filter or a `NOT NULL` constraint. There is no release note because, as far as we know, this bug is not possible to hit in production. The only reproduction we have requires optimization rules to be disabled, which can only happen in tests. Fixes #146507 Release note: None
1 parent 8fcfd5a commit 125c97f

File tree

3 files changed

+77
-10
lines changed

3 files changed

+77
-10
lines changed

pkg/sql/opt/props/equiv_set.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,20 @@ func (eq *EquivGroups) ContainsCol(col opt.ColumnID) bool {
8989
return false
9090
}
9191

92-
// AreColsEquiv indicates whether the given columns are equivalent.
93-
func (eq *EquivGroups) AreColsEquiv(left, right opt.ColumnID) bool {
92+
// AreColsEquiv indicates whether the given columns are equivalent. If "a" and
93+
// "b" are the same column, true is always returned, even for an empty
94+
// EquivGroups.
95+
func (eq *EquivGroups) AreColsEquiv(a, b opt.ColumnID) bool {
9496
if buildutil.CrdbTestBuild {
9597
defer eq.verify()
9698
}
97-
if left == right {
99+
if a == b {
98100
return true
99101
}
100102
for i := range eq.groups {
101-
containsLeft, containsRight := eq.groups[i].Contains(left), eq.groups[i].Contains(right)
102-
if containsLeft || containsRight {
103-
return containsLeft && containsRight
103+
containsA, containsB := eq.groups[i].Contains(a), eq.groups[i].Contains(b)
104+
if containsA || containsB {
105+
return containsA && containsB
104106
}
105107
}
106108
return false

pkg/sql/opt/xform/join_order_builder.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,9 @@ func (jb *JoinOrderBuilder) addJoins(s1, s2 vertexSet) {
620620
jb.equivs.AddFromFDs(&jb.plans[s1].Relational().FuncDeps)
621621
jb.equivs.AddFromFDs(&jb.plans[s2].Relational().FuncDeps)
622622

623+
notNullCols := jb.plans[s1].Relational().NotNullCols.Copy()
624+
notNullCols.UnionWith(jb.plans[s2].Relational().NotNullCols)
625+
623626
// Gather all inner edges that connect the left and right relation sets.
624627
var innerJoinFilters memo.FiltersExpr
625628
for i, ok := jb.innerEdges.Next(0); ok; i, ok = jb.innerEdges.Next(i + 1) {
@@ -631,7 +634,11 @@ func (jb *JoinOrderBuilder) addJoins(s1, s2 vertexSet) {
631634
// Record this edge as applied even if it's redundant, since redundant
632635
// edges are trivially applied.
633636
appliedEdges.Add(i)
634-
if areFiltersRedundant(&jb.equivs, e.filters) {
637+
redundant := areFiltersRedundant(&jb.equivs, e.filters, notNullCols)
638+
// Update notNullCols based on null-rejecting filters to aid in
639+
// finding subsequent redundant filters.
640+
notNullCols.UnionWith(memo.NullColsRejectedByFilter(jb.ctx, jb.evalCtx, e.filters))
641+
if redundant {
635642
// Avoid adding redundant filters.
636643
continue
637644
}
@@ -887,7 +894,11 @@ func (jb *JoinOrderBuilder) addJoin(
887894

888895
// areFiltersRedundant returns true if the given FiltersExpr contains a single
889896
// equality filter that is already represented by the given EquivGroups set.
890-
func areFiltersRedundant(equivs *props.EquivGroups, filters memo.FiltersExpr) bool {
897+
// notNullCols is the set of columns that are known to be non-null, either from
898+
// the logical properties of the join inputs, or from other filters.
899+
func areFiltersRedundant(
900+
equivs *props.EquivGroups, filters memo.FiltersExpr, notNullCols opt.ColSet,
901+
) bool {
891902
if len(filters) != 1 {
892903
return false
893904
}
@@ -900,6 +911,13 @@ func areFiltersRedundant(equivs *props.EquivGroups, filters memo.FiltersExpr) bo
900911
if !ok1 || !ok2 {
901912
return false
902913
}
914+
if !notNullCols.Contains(var1.Col) || !notNullCols.Contains(var2.Col) {
915+
// An equality between columns is not redundant if either column is
916+
// nullable because equality is null-rejecting. equivs allows for NULL
917+
// values of equivalent columns, so we must check for nullable columns
918+
// before checking for equivalence.
919+
return false
920+
}
903921
return equivs.AreColsEquiv(var1.Col, var2.Col)
904922
}
905923

pkg/sql/opt/xform/testdata/rules/join_order

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3558,7 +3558,7 @@ right-join (hash)
35583558

35593559
# Only 2 joins are considered (instead of 8) when the STRAIGHT hint is present in one join.
35603560
reorderjoins format=hide-all
3561-
SELECT *
3561+
SELECT *
35623562
FROM straight_join1
35633563
INNER STRAIGHT JOIN straight_join2 ON straight_join1.x = straight_join2.y
35643564
INNER JOIN straight_join3 ON straight_join1.x = straight_join3.z
@@ -3608,7 +3608,7 @@ inner-join (hash)
36083608

36093609
# No joins are considered when the STRAIGHT hint is present in both joins.
36103610
reorderjoins format=hide-all
3611-
SELECT *
3611+
SELECT *
36123612
FROM straight_join1
36133613
INNER STRAIGHT JOIN straight_join2 ON straight_join1.x = straight_join2.y
36143614
INNER STRAIGHT JOIN straight_join3 ON straight_join1.x = straight_join3.z
@@ -3628,3 +3628,50 @@ inner-join (hash)
36283628
├── scan straight_join3
36293629
└── filters
36303630
└── straight_join1.x = z
3631+
3632+
exec-ddl
3633+
CREATE TABLE t146507 (
3634+
a INT,
3635+
b INT
3636+
)
3637+
----
3638+
3639+
# Regression test for #146507. Join reordering must not eliminate the
3640+
# null-rejecting filter t.b:2 = t.b:2. There should be no inner-joins with empty
3641+
# filters.
3642+
memo disable=(SimplifySameVarEqualities,PushFilterIntoJoinLeft,HoistUnboundFilterFromExistsSubquery)
3643+
SELECT a FROM t146507 AS t WHERE b IN (SELECT t.b FROM t146507 AS t2)
3644+
----
3645+
memo (optimized, ~23KB, required=[presentation: a:1])
3646+
├── G1: (project G2 G3 a)
3647+
│ └── [presentation: a:1]
3648+
│ ├── best: (project G2 G3 a)
3649+
│ └── cost: 1130.15
3650+
├── G2: (semi-join G4 G5 G6) (project G7 G3 a b)
3651+
│ └── []
3652+
│ ├── best: (project G7 G3 a b)
3653+
│ └── cost: 1120.13
3654+
├── G3: (projections)
3655+
├── G4: (scan t146507 [as=t],cols=(1,2))
3656+
│ └── []
3657+
│ ├── best: (scan t146507 [as=t],cols=(1,2))
3658+
│ └── cost: 1078.52
3659+
├── G5: (scan t146507 [as=t2],cols=())
3660+
│ ├── [limit hint: 1.00]
3661+
│ │ ├── best: (scan t146507 [as=t2],cols=())
3662+
│ │ └── cost: 19.05
3663+
│ └── []
3664+
│ ├── best: (scan t146507 [as=t2],cols=())
3665+
│ └── cost: 1058.32
3666+
├── G6: (filters G8)
3667+
├── G7: (inner-join G4 G9 G6) (inner-join G9 G4 G6)
3668+
│ └── []
3669+
│ ├── best: (inner-join G4 G9 G6)
3670+
│ └── cost: 1110.11
3671+
├── G8: (eq G10 G10)
3672+
├── G9: (limit G5 G11) (scan t146507 [as=t2],cols=(),lim=1)
3673+
│ └── []
3674+
│ ├── best: (scan t146507 [as=t2],cols=(),lim=1)
3675+
│ └── cost: 9.04
3676+
├── G10: (variable t.b)
3677+
└── G11: (const 1)

0 commit comments

Comments
 (0)