Skip to content

Commit 48775d3

Browse files
craig[bot]sgasho
andcommitted
Merge #146494
146494: opt: simplify x is not distinct from y to x = y r=mgartner a=sgasho This commit adds a new select rule SimplifyIsCondition which rewrites expressions in the form `x IS NOT DISTINCT FROM y` to `x = y`. This transformation is only valid if all of the following are true: 1. The expression is in the context of filtering where NULL is falsy. 2. One of x or y is non-nullable. This is required because while the expression NULL IS NOT DISTINCT FROM NULL is true, NULL=NULL is NULL (falsy). 3. Neither x nor y is a tuple. Tuples with NULLs have all sorts of complicated edge cases, so we avoid them entirely. See #48299. We conservatively also require the types of x and y to be identical. It may be possible to lift this restriction if we can prove that it is not necessary. Fixes #144524 Release note (performance improvement): Some queries with filters in the form `x IS NOT DISTINCT FROM y` now have more optimal query plans. Co-authored-by: sgasho <[email protected]>
2 parents db03023 + bb47c21 commit 48775d3

File tree

3 files changed

+202
-0
lines changed

3 files changed

+202
-0
lines changed

pkg/sql/opt/norm/general_funcs.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ func (c *CustomFuncs) IsInt(scalar opt.ScalarExpr) bool {
7171
return scalar.DataType().Family() == types.IntFamily
7272
}
7373

74+
// IsTuple returns true if the given scalar expression is a tuple type.
75+
func (c *CustomFuncs) IsTuple(scalar opt.ScalarExpr) bool {
76+
return scalar.DataType().Family() == types.TupleFamily
77+
}
78+
7479
// BoolType returns the boolean SQL type.
7580
func (c *CustomFuncs) BoolType() *types.T {
7681
return types.Bool
@@ -107,6 +112,12 @@ func (c *CustomFuncs) TypeOf(e opt.ScalarExpr) *types.T {
107112
return e.DataType()
108113
}
109114

115+
// IdenticalTypes returns true if the two types are identical. See
116+
// (*types.T).Identical.
117+
func (c *CustomFuncs) IdenticalTypes(left, right *types.T) bool {
118+
return left.Identical(right)
119+
}
120+
110121
// IsConstArray returns true if the expression is a constant array.
111122
func (c *CustomFuncs) IsConstArray(scalar opt.ScalarExpr) bool {
112123
if cnst, ok := scalar.(*memo.ConstExpr); ok {
@@ -614,6 +625,13 @@ func (c *CustomFuncs) ExprIsNeverNull(e opt.ScalarExpr, notNullCols opt.ColSet)
614625
return memo.ExprIsNeverNull(e, notNullCols)
615626
}
616627

628+
// EitherExprIsNeverNull returns true if either of the two provided scalar
629+
// expressions is guaranteed to be non-NULL, given the set of outer columns that
630+
// are known to be not null.
631+
func (c *CustomFuncs) EitherExprIsNeverNull(a, b opt.ScalarExpr, notNullCols opt.ColSet) bool {
632+
return memo.ExprIsNeverNull(a, notNullCols) || memo.ExprIsNeverNull(b, notNullCols)
633+
}
634+
617635
// sharedProps returns the shared logical properties for the given expression.
618636
// Only relational expressions and certain scalar list items (e.g. FiltersItem,
619637
// ProjectionsItem, AggregationsItem) have shared properties.

pkg/sql/opt/norm/rules/select.opt

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,48 @@ $input
354354
=>
355355
(Select $input [ (FiltersItem (False)) ])
356356

357+
# SimplifyIsCondition replaces x IS NOT DISTINCT FROM y with x = y. This
358+
# transformation is only valid if all of the following are true:
359+
#
360+
# 1. The expression is in the context of filtering where NULL is falsy.
361+
# 2. One of x or y is non-nullable. This is required because while the
362+
# expression NULL IS NOT DISTINCT FROM NULL is true, NULL=NULL is NULL
363+
# (falsy).
364+
# 3. Neither x nor y is a tuple. Tuples with NULLs have all sorts of
365+
# complicated edge cases, so we avoid them entirely. See #48299.
366+
#
367+
# We conservatively also require the types of x and y to be identical. It may be
368+
# possible to lift this restriction if we can prove that it is not necessary.
369+
[SimplifyIsCondition, Normalize]
370+
(Select
371+
$input:*
372+
$filters:[
373+
...
374+
$item:(FiltersItem
375+
(Is
376+
$left:* & ^(IsTuple $left)
377+
$right:* &
378+
^(IsTuple $right) &
379+
(IdenticalTypes
380+
(TypeOf $left)
381+
(TypeOf $right)
382+
) &
383+
(EitherExprIsNeverNull
384+
$left
385+
$right
386+
(NotNullCols $input)
387+
)
388+
)
389+
)
390+
...
391+
]
392+
)
393+
=>
394+
(Select
395+
$input
396+
(ReplaceFiltersItem $filters $item (Eq $left $right))
397+
)
398+
357399
# PushSelectIntoProjectSet pushes filters into a ProjectSet. In particular,
358400
# the filters that are bound to the input columns of the ProjectSet are
359401
# pushed down into it, in hopes of being pushed down further into joins

pkg/sql/opt/norm/testdata/rules/select

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ exec-ddl
1414
CREATE TABLE c (a BOOL, b BOOL, c BOOL, d BOOL, e BOOL)
1515
----
1616

17+
exec-ddl
18+
CREATE TABLE d (k INT PRIMARY KEY, a INT NOT NULL, b INT, c INT, d FLOAT)
19+
----
20+
1721
exec-ddl
1822
CREATE TABLE e
1923
(
@@ -1638,6 +1642,144 @@ exec-ddl
16381642
DROP INDEX partial_idx
16391643
----
16401644

1645+
# --------------------------------------------------
1646+
# SimplifyIsCondition
1647+
# --------------------------------------------------
1648+
1649+
norm expect=SimplifyIsCondition
1650+
SELECT * FROM d WHERE a = 1 AND k IS NOT DISTINCT FROM 1 AND d = 1.0
1651+
----
1652+
select
1653+
├── columns: k:1!null a:2!null b:3 c:4 d:5!null
1654+
├── cardinality: [0 - 1]
1655+
├── key: ()
1656+
├── fd: ()-->(1-5)
1657+
├── scan d
1658+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1659+
│ ├── key: (1)
1660+
│ └── fd: (1)-->(2-5)
1661+
└── filters
1662+
├── a:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)]
1663+
├── k:1 = 1 [outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
1664+
└── d:5 = 1.0 [outer=(5), constraints=(/5: [/1.0 - /1.0]; tight), fd=()-->(5)]
1665+
1666+
# The rule applies if at least one of the operands is non-nullable.
1667+
norm expect=SimplifyIsCondition
1668+
SELECT * FROM d WHERE a IS NOT DISTINCT FROM b
1669+
----
1670+
select
1671+
├── columns: k:1!null a:2!null b:3!null c:4 d:5
1672+
├── key: (1)
1673+
├── fd: (1)-->(2-5), (2)==(3), (3)==(2)
1674+
├── scan d
1675+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1676+
│ ├── key: (1)
1677+
│ └── fd: (1)-->(2-5)
1678+
└── filters
1679+
└── a:2 = b:3 [outer=(2,3), constraints=(/2: (/NULL - ]; /3: (/NULL - ]), fd=(2)==(3), (3)==(2)]
1680+
1681+
# The rule does not apply if both operands are nullable.
1682+
norm expect-not=SimplifyIsCondition
1683+
SELECT * FROM d WHERE b IS NOT DISTINCT FROM NULL
1684+
----
1685+
select
1686+
├── columns: k:1!null a:2!null b:3 c:4 d:5
1687+
├── key: (1)
1688+
├── fd: ()-->(3), (1)-->(2,4,5)
1689+
├── scan d
1690+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1691+
│ ├── key: (1)
1692+
│ └── fd: (1)-->(2-5)
1693+
└── filters
1694+
└── b:3 IS NULL [outer=(3), constraints=(/3: [/NULL - /NULL]; tight), fd=()-->(3)]
1695+
1696+
norm expect-not=SimplifyIsCondition
1697+
SELECT * FROM d WHERE NULL IS NOT DISTINCT FROM b
1698+
----
1699+
select
1700+
├── columns: k:1!null a:2!null b:3 c:4 d:5
1701+
├── key: (1)
1702+
├── fd: ()-->(3), (1)-->(2,4,5)
1703+
├── scan d
1704+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1705+
│ ├── key: (1)
1706+
│ └── fd: (1)-->(2-5)
1707+
└── filters
1708+
└── b:3 IS NULL [outer=(3), constraints=(/3: [/NULL - /NULL]; tight), fd=()-->(3)]
1709+
1710+
norm expect-not=SimplifyIsCondition
1711+
SELECT * FROM d WHERE b IS NOT DISTINCT FROM c
1712+
----
1713+
select
1714+
├── columns: k:1!null a:2!null b:3 c:4 d:5
1715+
├── key: (1)
1716+
├── fd: (1)-->(2-5)
1717+
├── scan d
1718+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1719+
│ ├── key: (1)
1720+
│ └── fd: (1)-->(2-5)
1721+
└── filters
1722+
└── b:3 IS NOT DISTINCT FROM c:4 [outer=(3,4)]
1723+
1724+
norm expect-not=SimplifyIsCondition
1725+
SELECT * FROM d WHERE b IS NULL
1726+
----
1727+
select
1728+
├── columns: k:1!null a:2!null b:3 c:4 d:5
1729+
├── key: (1)
1730+
├── fd: ()-->(3), (1)-->(2,4,5)
1731+
├── scan d
1732+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1733+
│ ├── key: (1)
1734+
│ └── fd: (1)-->(2-5)
1735+
└── filters
1736+
└── b:3 IS NULL [outer=(3), constraints=(/3: [/NULL - /NULL]; tight), fd=()-->(3)]
1737+
1738+
# The rule does not apply if the operands are tuples.
1739+
norm expect-not=SimplifyIsCondition
1740+
SELECT * FROM d WHERE (k, a) IS NOT DISTINCT FROM (a, k)
1741+
----
1742+
select
1743+
├── columns: k:1!null a:2!null b:3 c:4 d:5
1744+
├── immutable
1745+
├── key: (1)
1746+
├── fd: (1)-->(2-5)
1747+
├── scan d
1748+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1749+
│ ├── key: (1)
1750+
│ └── fd: (1)-->(2-5)
1751+
└── filters
1752+
└── (k:1, a:2) IS NOT DISTINCT FROM (a:2, k:1) [outer=(1,2), immutable]
1753+
1754+
# The rule does not apply if the operands do not have identical types.
1755+
norm expect-not=SimplifyIsCondition
1756+
SELECT * FROM d WHERE a IS NOT DISTINCT FROM 1.23
1757+
----
1758+
select
1759+
├── columns: k:1!null a:2!null b:3 c:4 d:5
1760+
├── key: (1)
1761+
├── fd: (1)-->(2-5)
1762+
├── scan d
1763+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1764+
│ ├── key: (1)
1765+
│ └── fd: (1)-->(2-5)
1766+
└── filters
1767+
└── a:2 IS NOT DISTINCT FROM 1.23 [outer=(2)]
1768+
1769+
norm expect-not=SimplifyIsCondition
1770+
SELECT * FROM d WHERE a IS NOT DISTINCT FROM d
1771+
----
1772+
select
1773+
├── columns: k:1!null a:2!null b:3 c:4 d:5
1774+
├── key: (1)
1775+
├── fd: (1)-->(2-5)
1776+
├── scan d
1777+
│ ├── columns: k:1!null a:2!null b:3 c:4 d:5
1778+
│ ├── key: (1)
1779+
│ └── fd: (1)-->(2-5)
1780+
└── filters
1781+
└── a:2 IS NOT DISTINCT FROM d:5 [outer=(2,5)]
1782+
16411783
# --------------------------------------------------
16421784
# PushSelectIntoProjectSet
16431785
# --------------------------------------------------

0 commit comments

Comments
 (0)