Skip to content

Commit 62cfc3d

Browse files
committed
opt: convert parameterized lookup joins to placeholder scans
Parameterized lookup joins, which are used in generic query plans, are now converted to placeholder scans in some cases. These placeholder scans are more efficient because they cater to a narrower set of features than general-purpose lookup joins and because they natively support vectorization. See the new exploration rule for more details. Release note: None
1 parent 90ae675 commit 62cfc3d

File tree

13 files changed

+1780
-183
lines changed

13 files changed

+1780
-183
lines changed

pkg/sql/opt/exec/execbuilder/mutation.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ func shouldApplyImplicitLockingToUpdateOrDeleteInput(
12331233
var toLockIndexes intsets.Fast
12341234
// Try to match the mutation's input expression against the pattern:
12351235
//
1236-
// [Project]* [IndexJoin] (Scan | LookupJoin [LookupJoin] Values)
1236+
// [Project]* [IndexJoin] (Scan | PlaceholderScan | LookupJoin [LookupJoin] Values)
12371237
//
12381238
// The IndexJoin will only be present if the base expression is a Scan, but
12391239
// making it an optional prefix to the LookupJoins makes the logic simpler.
@@ -1247,6 +1247,9 @@ func shouldApplyImplicitLockingToUpdateOrDeleteInput(
12471247
case *memo.ScanExpr:
12481248
toLockIndexes.Add(t.Index)
12491249
toLock = t.Table
1250+
case *memo.PlaceholderScanExpr:
1251+
toLockIndexes.Add(t.Index)
1252+
toLock = t.Table
12501253
case *memo.LookupJoinExpr:
12511254
toLockIndexes.Add(t.Index)
12521255
if innerJoin, ok := t.Input.(*memo.LookupJoinExpr); ok && innerJoin.Table == t.Table {

pkg/sql/opt/exec/execbuilder/testdata/delete

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -500,28 +500,20 @@ quality of service: regular
500500
│ from: t137352
501501
│ auto commit
502502
503-
└── • lookup join
504-
│ sql nodes: <hidden>
505-
│ kv nodes: <hidden>
506-
│ regions: <hidden>
507-
│ actual row count: 0
508-
│ KV time: 0µs
509-
│ KV rows decoded: 0
510-
│ KV bytes read: 0 B
511-
│ KV gRPC calls: 0
512-
│ execution time: 0µs
513-
│ estimated max memory allocated: 0 B
514-
│ table: t137352@t137352_pkey
515-
│ equality: ($1) = (k)
516-
│ equality cols are key
517-
│ locking strength: for update
518-
519-
└── • values
520-
sql nodes: <hidden>
521-
regions: <hidden>
522-
actual row count: 1
523-
execution time: 0µs
524-
size: 1 column, 1 row
503+
└── • scan
504+
sql nodes: <hidden>
505+
kv nodes: <hidden>
506+
regions: <hidden>
507+
actual row count: 0
508+
KV time: 0µs
509+
KV rows decoded: 0
510+
KV bytes read: 0 B
511+
KV gRPC calls: 0
512+
estimated max memory allocated: 0 B
513+
missing stats
514+
table: t137352@t137352_pkey
515+
spans: [/1 - /1]
516+
locking strength: for update
525517

526518
statement ok
527519
INSERT INTO t137352 VALUES (1, 10, 100);

pkg/sql/opt/exec/execbuilder/testdata/update

Lines changed: 28 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,28 +1025,20 @@ quality of service: regular
10251025
10261026
└── • render
10271027
1028-
└── • lookup join
1029-
│ sql nodes: <hidden>
1030-
│ kv nodes: <hidden>
1031-
│ regions: <hidden>
1032-
│ actual row count: 0
1033-
│ KV time: 0µs
1034-
│ KV rows decoded: 0
1035-
│ KV bytes read: 0 B
1036-
│ KV gRPC calls: 0
1037-
│ execution time: 0µs
1038-
│ estimated max memory allocated: 0 B
1039-
│ table: t137352@t137352_pkey
1040-
│ equality: ($1) = (k)
1041-
│ equality cols are key
1042-
│ locking strength: for update
1043-
1044-
└── • values
1045-
sql nodes: <hidden>
1046-
regions: <hidden>
1047-
actual row count: 1
1048-
execution time: 0µs
1049-
size: 1 column, 1 row
1028+
└── • scan
1029+
sql nodes: <hidden>
1030+
kv nodes: <hidden>
1031+
regions: <hidden>
1032+
actual row count: 0
1033+
KV time: 0µs
1034+
KV rows decoded: 0
1035+
KV bytes read: 0 B
1036+
KV gRPC calls: 0
1037+
estimated max memory allocated: 0 B
1038+
missing stats
1039+
table: t137352@t137352_pkey
1040+
spans: [/1 - /1]
1041+
locking strength: for update
10501042

10511043
statement ok
10521044
INSERT INTO t137352 VALUES (1, 2, 3);
@@ -1297,29 +1289,20 @@ quality of service: regular
12971289
│ equality: (k) = (a)
12981290
│ pred: a = 1
12991291
1300-
└── • lookup join
1301-
│ sql nodes: <hidden>
1302-
│ kv nodes: <hidden>
1303-
│ regions: <hidden>
1304-
│ actual row count: 1
1305-
│ KV time: 0µs
1306-
│ KV rows decoded: 1
1307-
│ KV pairs read: 2
1308-
│ KV bytes read: 8 B
1309-
│ KV gRPC calls: 1
1310-
│ execution time: 0µs
1311-
│ estimated max memory allocated: 0 B
1312-
│ estimated row count: 1
1313-
│ table: t137352@t137352_pkey
1314-
│ equality: ($1) = (k)
1315-
│ equality cols are key
1316-
1317-
└── • values
1318-
sql nodes: <hidden>
1319-
regions: <hidden>
1320-
actual row count: 1
1321-
execution time: 0µs
1322-
size: 1 column, 1 row
1292+
└── • scan
1293+
sql nodes: <hidden>
1294+
kv nodes: <hidden>
1295+
regions: <hidden>
1296+
actual row count: 1
1297+
KV time: 0µs
1298+
KV rows decoded: 1
1299+
KV pairs read: 2
1300+
KV bytes read: 8 B
1301+
KV gRPC calls: 1
1302+
estimated max memory allocated: 0 B
1303+
estimated row count: 1
1304+
table: t137352@t137352_pkey
1305+
spans: [/1 - /1]
13231306

13241307
statement ok
13251308
DELETE FROM t137352 WHERE true;

pkg/sql/opt/exec/execbuilder/testdata/virtual

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,4 +539,3 @@ vectorized: true
539539

540540
statement ok
541541
RESET disallow_full_table_scans;
542-

pkg/sql/opt/memo/logical_props_builder.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,15 @@ func (b *logicalPropsBuilder) buildScanProps(scan *ScanExpr, rel *props.Relation
203203
}
204204
}
205205

206+
// buildPlaceholderScanProps is unimplemented. Placeholder expressions are only created
207+
// in two places:
208+
//
209+
// 1. The placeholder fast-path which entirely skips optimization.
210+
// 2. The ConvertParameterizedLookupJoinToPlaceholderScan exploration rule
211+
// which always adds the placeholder scan to an existing memo group, for
212+
// which logical properties have already been built.
213+
//
214+
// In both cases this function is never called.
206215
func (b *logicalPropsBuilder) buildPlaceholderScanProps(
207216
scan *PlaceholderScanExpr, rel *props.Relational,
208217
) {

pkg/sql/opt/norm/factory_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,8 @@ func TestCopyAndReplace(t *testing.T) {
8787

8888
if e, err := o.Optimize(); err != nil {
8989
t.Fatal(err)
90-
} else if e.Op() != opt.ProjectOp || e.Child(0).Op() != opt.LookupJoinOp {
91-
t.Errorf("expected optimizer to choose a (project (lookup-join)), not (%v (%v))",
92-
e.Op(), e.Child(0).Op())
90+
} else if e.Op() != opt.PlaceholderScanOp {
91+
t.Errorf("expected optimizer to choose a placeholder scan, not %v", e.Op())
9392
}
9493

9594
m := o.Factory().DetachMemo()

pkg/sql/opt/xform/coster.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,9 @@ func (c *coster) ComputeCost(candidate memo.RelExpr, required *physical.Required
540540
case opt.ScanOp:
541541
cost = c.computeScanCost(candidate.(*memo.ScanExpr), required)
542542

543+
case opt.PlaceholderScanOp:
544+
cost = c.computePlaceholderScanCost(candidate.(*memo.PlaceholderScanExpr), required)
545+
543546
case opt.SelectOp:
544547
cost = c.computeSelectCost(candidate.(*memo.SelectExpr), required)
545548

@@ -901,6 +904,50 @@ func (c *coster) computeScanCost(scan *memo.ScanExpr, required *physical.Require
901904
return cost
902905
}
903906

907+
// computePlaceholderScanCost computes the cost of a placeholder scan. It mimics
908+
// the logic in computeScanCost that is relevant for placeholder scans.
909+
func (c *coster) computePlaceholderScanCost(
910+
scan *memo.PlaceholderScanExpr, required *physical.Required,
911+
) memo.Cost {
912+
if !scan.Flags.Empty() {
913+
panic(errors.AssertionFailedf("expected empty flags for placeholder scan"))
914+
}
915+
916+
stats := scan.Relational().Statistics()
917+
rowCount := stats.RowCount
918+
const numSpans = 1 // A placeholder scan always has a single span.
919+
baseCost := memo.Cost{C: numSpans * randIOCostFactor}
920+
921+
// Add the IO cost of retrieving and the CPU cost of emitting the rows. The
922+
// row cost depends on the size of the columns scanned.
923+
perRowCost := c.rowScanCost(scan.Table, scan.Index, scan.Cols)
924+
925+
// If this is a virtual scan, add the cost of fetching table descriptors.
926+
if c.mem.Metadata().Table(scan.Table).IsVirtualTable() {
927+
baseCost.C += virtualScanTableDescriptorFetchCost
928+
}
929+
930+
// Add a penalty if the cardinality exceeds the row count estimate. Adding a
931+
// few rows worth of cost helps prevent surprising plans for very small tables
932+
// or for when stats are stale.
933+
//
934+
// Note: we add this to the baseCost rather than the rowCount, so that the
935+
// number of index columns does not have an outsized effect on the cost of
936+
// the scan. See issue #68556.
937+
baseCost.Add(c.largeCardinalityCostPenalty(scan.Relational().Cardinality, rowCount))
938+
939+
if required.LimitHint != 0 {
940+
rowCount = math.Min(rowCount, required.LimitHint)
941+
}
942+
943+
cost := baseCost
944+
cost.C += rowCount * (seqIOCostFactor + perRowCost.C)
945+
946+
// TODO(#148315): Consider adding distribution cost for RBR tables.
947+
cost.Add(SmallDistributeCost)
948+
return cost
949+
}
950+
904951
func distributionIsLocal(regionsAccessed physical.Distribution, evalCtx *eval.Context) bool {
905952
if len(regionsAccessed.Regions) == 1 {
906953
var localDist physical.Distribution

pkg/sql/opt/xform/generic_funcs.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/cockroachdb/cockroach/pkg/sql/sem/volatility"
1515
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
1616
"github.com/cockroachdb/cockroach/pkg/sql/types"
17+
"github.com/cockroachdb/errors"
1718
)
1819

1920
// GenericRulesEnabled returns true if rules for optimizing generic query plans
@@ -124,3 +125,64 @@ func (c *CustomFuncs) ParameterizedJoinPrivate() *memo.JoinPrivate {
124125
SkipReorderJoins: true,
125126
}
126127
}
128+
129+
// PlaceholderScanSpanAndPrivate returns a span and scan private for a
130+
// PlaceholderScan expression that is semantically equivalent to the given
131+
// lookup join with input values. See
132+
// ConvertParameterizedLookupJoinToPlaceholderScan for more details.
133+
func (c *CustomFuncs) PlaceholderScanSpanAndPrivate(
134+
lookupPrivate *memo.LookupJoinPrivate,
135+
values *memo.ValuesExpr,
136+
row memo.ScalarListExpr,
137+
outputCols opt.ColSet,
138+
) (span memo.ScalarListExpr, scanPrivate *memo.ScanPrivate, ok bool) {
139+
// The lookup join must be an inner join.
140+
if lookupPrivate.JoinType != opt.InnerJoinOp {
141+
return nil, nil, false
142+
}
143+
// The lookup join must only have key columns, no lookup expressions.
144+
if len(lookupPrivate.KeyCols) == 0 ||
145+
lookupPrivate.LookupExpr != nil ||
146+
lookupPrivate.RemoteLookupExpr != nil {
147+
return nil, nil, false
148+
}
149+
// The lookup join must not be part of a paired join.
150+
if lookupPrivate.IsFirstJoinInPairedJoiner || lookupPrivate.IsSecondJoinInPairedJoiner {
151+
return nil, nil, false
152+
}
153+
// The index must be able to produce all the output columns.
154+
md := c.e.f.Metadata()
155+
indexCols := md.TableMeta(lookupPrivate.Table).IndexColumns(lookupPrivate.Index)
156+
if !outputCols.SubsetOf(indexCols) {
157+
return nil, nil, false
158+
}
159+
160+
// Map columns in the input Values expression to the key columns.
161+
span = make(memo.ScalarListExpr, len(lookupPrivate.KeyCols))
162+
for i, keyCol := range lookupPrivate.KeyCols {
163+
for j, valCol := range values.Cols {
164+
if keyCol == valCol {
165+
if !verifyType(md, keyCol, row[j].DataType()) {
166+
// TODO(mgartner): This was added to copy the same check
167+
// made while planning the the placeholder fast-path, but it
168+
// may not be necessary here because the lookup join may
169+
// have already checked this.
170+
return nil, nil, false
171+
}
172+
span[i] = row[j]
173+
break
174+
}
175+
}
176+
if span[i] == nil {
177+
panic(errors.AssertionFailedf("no value found for key column %d", keyCol))
178+
}
179+
}
180+
181+
scanPrivate = &memo.ScanPrivate{
182+
Table: lookupPrivate.Table,
183+
Index: lookupPrivate.Index,
184+
Cols: outputCols.Copy(),
185+
Locking: lookupPrivate.Locking,
186+
}
187+
return span, scanPrivate, true
188+
}

pkg/sql/opt/xform/rules/generic.opt

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,45 @@
5656
[]
5757
(OutputCols (Root))
5858
)
59+
60+
# ConvertParameterizedLookupJoinToPlaceholderScan converts some LookupJoins that
61+
# ultimately result from GenerateParameterizedJoin into PlaceholderScans. The
62+
# execution of PlaceholderScans are more efficient than more general-purpose
63+
# LookupJoins. They also natively support vectorized execution, helping to avoid
64+
# materialization and columnarization overhead.
65+
#
66+
# The rule only applies to inner LookupJoins where all of the following are
67+
# true:
68+
#
69+
# 1. Only key columns are used; no lookup expressions.
70+
# 2. There is no ON filter.
71+
# 3. The LookupJoin is not part of a paired join.
72+
# 4. The lookup index can produce all of the needed output columns.
73+
#
74+
[ConvertParameterizedLookupJoinToPlaceholderScan, Explore]
75+
(Project
76+
(LookupJoin
77+
$values:(Values [ (Tuple $row:[ ... ]) ])
78+
[]
79+
$lookupPrivate:*
80+
)
81+
[]
82+
$outputCols:* &
83+
(GenericRulesEnabled) &
84+
(HasPlaceholdersOrStableExprs $values) &
85+
(Let
86+
(
87+
$span
88+
$scanPrivate
89+
$ok
90+
):(PlaceholderScanSpanAndPrivate
91+
$lookupPrivate
92+
$values
93+
$row
94+
$outputCols
95+
)
96+
$ok
97+
)
98+
)
99+
=>
100+
(PlaceholderScan $span $scanPrivate)

0 commit comments

Comments
 (0)