Skip to content

Commit a5d46f4

Browse files
committed
addressed jason's comments
1 parent 12ad271 commit a5d46f4

File tree

2 files changed

+20
-15
lines changed

2 files changed

+20
-15
lines changed

memory/table.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1665,6 +1665,10 @@ func (t *IndexedTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup
16651665
}
16661666

16671667
func adjustRangeScanFilterForIndexLookup(filter sql.Expression, index *Index) sql.Expression {
1668+
if filter == nil {
1669+
return filter
1670+
}
1671+
16681672
exprs := index.ExtendedExprs()
16691673

16701674
indexStorageSchema := make(sql.Schema, len(exprs))
@@ -1674,10 +1678,6 @@ func adjustRangeScanFilterForIndexLookup(filter sql.Expression, index *Index) sq
16741678
}
16751679
}
16761680

1677-
if filter == nil {
1678-
return filter
1679-
}
1680-
16811681
filter, _, err := transform.Expr(filter, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
16821682
if gf, ok := e.(*expression.GetField); ok {
16831683
idxIdx := indexStorageSchema.IndexOfColName(gf.Name())

sql/memo/coster.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -121,16 +121,19 @@ func (c *coster) costRel(ctx *sql.Context, n RelExpr, s sql.StatsProvider) (floa
121121
// TODO: estimate memory overhead
122122
return float64(lTableScan+rTableScan)*(seqIOCostFactor+cpuCostFactor) + cpuCostFactor*selfJoinCard, nil
123123
case jp.Op.IsLookup():
124-
// TODO added overhead for right lookups
125124
switch n := n.(type) {
126125
case *LookupJoin:
127-
if n.Injective {
126+
// Match rate is what proportion of rows from the right side we're expected to read
127+
matchRate := lookupJoinMatchRate(n.Lookup, n.JoinBase)
128+
129+
// If LookupJoin is injective, then there will only be one right lookup per left row
130+
if n.Injective || matchRate == 0 {
128131
return lBest*seqIOCostFactor + lBest*randIOCostFactor, nil
129132
}
130-
sel := lookupJoinSelectivity(n.Lookup, n.JoinBase)
131-
expectedRightRows := selfJoinCard * sel
132-
// read the whole left table and randIO into table equivalent to
133-
// this join's output cardinality estimate
133+
134+
// The total expected number of right row lookups
135+
expectedRightRows := lBest * matchRate
136+
// Estimate for reading each left row and each expected right row
134137
return lBest*seqIOCostFactor + expectedRightRows*randIOCostFactor, nil
135138
case *ConcatJoin:
136139
return c.costConcatJoin(ctx, n, s)
@@ -227,15 +230,17 @@ func (c *coster) costConcatJoin(_ *sql.Context, n *ConcatJoin, _ sql.StatsProvid
227230
var sel float64
228231
for _, l := range n.Concat {
229232
lookup := l
230-
sel += lookupJoinSelectivity(lookup, n.JoinBase)
233+
sel += lookupJoinMatchRate(lookup, n.JoinBase)
231234
}
232235
return l*sel*concatCostFactor*(randIOCostFactor+cpuCostFactor) - float64(n.Right.RelProps.GetStats().RowCount())*seqIOCostFactor, nil
233236
}
234237

235-
// lookupJoinSelectivity estimates the selectivity of a join condition with n lhs rows and m rhs rows.
236-
// A join with a selectivity of k will return k*(n*m) rows.
237-
// Special case: A join with a selectivity of 0 will return n rows.
238-
func lookupJoinSelectivity(l *IndexScan, joinBase *JoinBase) float64 {
238+
// lookupJoinMatchRate returns a heuristic estimate of the proportion of right-side rows matched
239+
// for each left-side row in a LookupJoin. Lower values indicate higher selectivity (i.e., more filtering).
240+
//
241+
// Special case: If the lookup is injective (i.e., at most one match per left row), we return 0 to
242+
// indicate that join cardinality is ≤ the left-side cardinality.
243+
func lookupJoinMatchRate(l *IndexScan, joinBase *JoinBase) float64 {
239244
if isInjectiveLookup(l.Index, joinBase, l.Table.Expressions(), l.Table.NullMask()) {
240245
return 0
241246
}

0 commit comments

Comments
 (0)