Skip to content

Commit 7f608c4

Browse files
committed
When selecting an index, detect when one produces strictly fewer rows than another.
1 parent 3ca53a1 commit 7f608c4

File tree

1 file changed

+43
-13
lines changed

1 file changed

+43
-13
lines changed

sql/analyzer/costed_index_scan.go

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package analyzer
1616

1717
import (
1818
"fmt"
19+
"slices"
1920
"sort"
2021
"strings"
2122
"time"
@@ -203,6 +204,9 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta
203204
if !ok {
204205
stat, err = uniformDistStatisticsForIndex(ctx, statsProv, iat, idx)
205206
}
207+
if err != nil {
208+
return nil, nil, nil, err
209+
}
206210
err := c.cost(root, stat, idx)
207211
if err != nil {
208212
return nil, nil, nil, err
@@ -446,6 +450,8 @@ type indexCoster struct {
446450
// prefix key of the best indexScan
447451
bestPrefix int
448452
underlyingName string
453+
// whether the column following the prefix key is limited to a subrange
454+
hasRange bool
449455
}
450456

451457
// cost tries to build the lowest cardinality index scan for an expression
@@ -459,10 +465,11 @@ func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) err
459465
var prefix int
460466
var err error
461467
var ok bool
468+
hasRange := false
462469

463470
switch f := f.(type) {
464471
case *iScanAnd:
465-
newHist, newFds, filters, prefix, err = c.costIndexScanAnd(c.ctx, f, stat, stat.Histogram(), ordinals, idx)
472+
newHist, newFds, filters, prefix, hasRange, err = c.costIndexScanAnd(c.ctx, f, stat, stat.Histogram(), ordinals, idx)
466473
if err != nil {
467474
return err
468475
}
@@ -491,12 +498,12 @@ func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) err
491498
newFds = &sql.FuncDepSet{}
492499
}
493500

494-
c.updateBest(stat, newHist, newFds, filters, prefix)
501+
c.updateBest(stat, newHist, newFds, filters, prefix, hasRange)
495502

496503
return nil
497504
}
498505

499-
func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fds *sql.FuncDepSet, filters sql.FastIntSet, prefix int) {
506+
func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fds *sql.FuncDepSet, filters sql.FastIntSet, prefix int, hasRange bool) {
500507
if s == nil || filters.Len() == 0 {
501508
return
502509
}
@@ -510,6 +517,7 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd
510517
c.bestCnt = rowCnt
511518
c.bestFilters = filters
512519
c.bestPrefix = prefix
520+
c.hasRange = hasRange
513521
}
514522
}()
515523

@@ -534,6 +542,26 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd
534542
return
535543
}
536544

545+
// If one index uses a strict superset of the filters of the other, we should always pick the superset.
546+
// This is true even if the index with more filters isn't unique.
547+
if prefix > c.bestPrefix && slices.Equal(c.bestStat.Columns()[:c.bestPrefix], s.Columns()[:c.bestPrefix]) {
548+
update = true
549+
return
550+
}
551+
552+
if prefix == c.bestPrefix && slices.Equal(c.bestStat.Columns()[:c.bestPrefix], s.Columns()[:c.bestPrefix]) && hasRange && !c.hasRange {
553+
update = true
554+
return
555+
}
556+
557+
if c.bestPrefix > prefix && slices.Equal(c.bestStat.Columns()[:prefix], s.Columns()[:prefix]) {
558+
return
559+
}
560+
561+
if c.bestPrefix == prefix && slices.Equal(c.bestStat.Columns()[:prefix], s.Columns()[:prefix]) && !hasRange && c.hasRange {
562+
return
563+
}
564+
537565
bestKey, bok := best.StrictKey()
538566
cmpKey, cok := cmp.StrictKey()
539567
if cok && !bok {
@@ -575,6 +603,10 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd
575603
return
576604
}
577605

606+
if filters.Len() < c.bestFilters.Len() {
607+
return
608+
}
609+
578610
if s.ColSet().Len()-filters.Len() < c.bestStat.ColSet().Len()-c.bestFilters.Len() {
579611
// prefer 1 range filter over 1 column index (1 - 1 = 0)
580612
// vs. 1 range filter over 2 column index (2 - 1 = 1)
@@ -1199,7 +1231,7 @@ func ordinalsForStat(stat sql.Statistic) map[string]int {
11991231
// updated statistic, the subset of applicable filters, the maximum prefix
12001232
// key created by a subset of equality filters (from conjunction only),
12011233
// or an error if applicable.
1202-
func (c *indexCoster) costIndexScanAnd(ctx *sql.Context, filter *iScanAnd, s sql.Statistic, buckets []sql.HistogramBucket, ordinals map[string]int, idx sql.Index) ([]sql.HistogramBucket, *sql.FuncDepSet, sql.FastIntSet, int, error) {
1234+
func (c *indexCoster) costIndexScanAnd(ctx *sql.Context, filter *iScanAnd, s sql.Statistic, buckets []sql.HistogramBucket, ordinals map[string]int, idx sql.Index) ([]sql.HistogramBucket, *sql.FuncDepSet, sql.FastIntSet, int, bool, error) {
12031235
// first step finds the conjunctions that match index prefix columns.
12041236
// we divide into eqFilters and rangeFilters
12051237

@@ -1210,13 +1242,13 @@ func (c *indexCoster) costIndexScanAnd(ctx *sql.Context, filter *iScanAnd, s sql
12101242
for _, or := range filter.orChildren {
12111243
childStat, _, ok, err := c.costIndexScanOr(or.(*iScanOr), s, buckets, ordinals, idx)
12121244
if err != nil {
1213-
return nil, nil, sql.FastIntSet{}, 0, err
1245+
return nil, nil, sql.FastIntSet{}, 0, false, err
12141246
}
12151247
// if valid, INTERSECT
12161248
if ok {
12171249
ret, err = stats.Intersect(c.ctx, ret, childStat, s.Types())
12181250
if err != nil {
1219-
return nil, nil, sql.FastIntSet{}, 0, err
1251+
return nil, nil, sql.FastIntSet{}, 0, false, err
12201252
}
12211253
exact.Add(int(or.Id()))
12221254
}
@@ -1237,12 +1269,8 @@ func (c *indexCoster) costIndexScanAnd(ctx *sql.Context, filter *iScanAnd, s sql
12371269
conjFDs = conj.getFds()
12381270
}
12391271

1240-
if exact.Len()+conj.applied.Len() == filter.childCnt() {
1241-
// matched all filters
1242-
return conj.hist, conjFDs, sql.NewFastIntSet(int(filter.id)), conj.missingPrefix, nil
1243-
}
1244-
1245-
return conj.hist, conjFDs, exact.Union(conj.applied), conj.missingPrefix, nil
1272+
hasRange := conj.ineqCols.Contains(conj.missingPrefix)
1273+
return conj.hist, conjFDs, exact.Union(conj.applied), conj.missingPrefix, hasRange, nil
12461274
}
12471275

12481276
func (c *indexCoster) costIndexScanOr(filter *iScanOr, s sql.Statistic, buckets []sql.HistogramBucket, ordinals map[string]int, idx sql.Index) ([]sql.HistogramBucket, *sql.FuncDepSet, bool, error) {
@@ -1253,7 +1281,7 @@ func (c *indexCoster) costIndexScanOr(filter *iScanOr, s sql.Statistic, buckets
12531281
for _, child := range filter.children {
12541282
switch child := child.(type) {
12551283
case *iScanAnd:
1256-
childBuckets, _, ids, _, err := c.costIndexScanAnd(c.ctx, child, s, buckets, ordinals, idx)
1284+
childBuckets, _, ids, _, _, err := c.costIndexScanAnd(c.ctx, child, s, buckets, ordinals, idx)
12571285
if err != nil {
12581286
return nil, nil, false, err
12591287
}
@@ -1664,6 +1692,7 @@ type conjCollector struct {
16641692
ordinals map[string]int
16651693
missingPrefix int
16661694
constant sql.FastIntSet
1695+
ineqCols sql.FastIntSet
16671696
eqVals []interface{}
16681697
nullable []bool
16691698
applied sql.FastIntSet
@@ -1732,6 +1761,7 @@ func (c *conjCollector) addEq(ctx *sql.Context, col string, val interface{}, nul
17321761

17331762
func (c *conjCollector) addIneq(ctx *sql.Context, op IndexScanOp, col string, val interface{}) error {
17341763
ord := c.ordinals[col]
1764+
c.ineqCols.Add(ord)
17351765
if ord > 0 {
17361766
return nil
17371767
}

0 commit comments

Comments
 (0)