Skip to content

Commit 08db44e

Browse files
craig[bot]mgartner
andcommitted
Merge #153423
153423: opt: decouple LOS costing from optimizer r=mgartner a=mgartner In #93377 the coster and the optimizer were coupled together when cost scaling for the remote branch of a locality optimized search was hard-coded into the optimizer. This coupling blurs the responsibilities of the optimizer and coster and defies their design. `Coster` mentions that it "is an interface so that different costing algorithms can be used by the optimizer". To fix this, a `RemoteBranch` field has been added to physical properties and the coster uses this field to perform cost scaling. There should be no difference in end-user behavior due to this change. Release note: None Co-authored-by: Marcus Gartner <[email protected]>
2 parents c9ce09a + c10c3d5 commit 08db44e

File tree

8 files changed

+61
-33
lines changed

8 files changed

+61
-33
lines changed

pkg/sql/opt/exec/execbuilder/testdata/explain_redact

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3546,12 +3546,12 @@ memo (optimized, ~8KB, required=[presentation: info:9] [distribution: test])
35463546
│ ├── [ordering: +8 opt(2)]
35473547
│ │ ├── best: (sort G3)
35483548
│ │ └── cost: 1398.52
3549+
│ ├── [ordering: +8 opt(2)] [distribution: test] [limit hint: 3.00]
3550+
│ │ ├── best: (distribute G3="[ordering: +8 opt(2)]")
3551+
│ │ └── cost: 1598.54
35493552
│ ├── [ordering: +8 opt(2)] [limit hint: 3.00]
35503553
│ │ ├── best: (sort G3)
35513554
│ │ └── cost: 1398.52
3552-
│ ├── [ordering: +8 opt(2)] [limit hint: 3.00] [distribution: test]
3553-
│ │ ├── best: (distribute G3="[ordering: +8 opt(2)]")
3554-
│ │ └── cost: 1598.54
35553555
│ └── []
35563556
│ ├── best: (project G5 G6 c d)
35573557
│ └── cost: 1149.04

pkg/sql/opt/memo/interner.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -644,8 +644,9 @@ func (h *hasher) HashPhysProps(val *physical.Required) {
644644
}
645645
}
646646
h.HashOrderingChoice(val.Ordering)
647-
h.HashFloat64(val.LimitHint)
648647
h.HashDistribution(val.Distribution)
648+
h.HashFloat64(val.LimitHint)
649+
h.HashBool(val.RemoteBranch)
649650
}
650651

651652
func (h *hasher) HashDistribution(val physical.Distribution) {

pkg/sql/opt/memo/interner_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,16 @@ func TestInternerPhysProps(t *testing.T) {
963963
LimitHint: 1,
964964
Distribution: physical.Distribution{Regions: []string{"us-east", "us-west"}},
965965
}
966+
physProps10 := physical.Required{
967+
Presentation: physical.Presentation{{Alias: "c", ID: 1}},
968+
Ordering: props.ParseOrderingChoice("+(1|2),+3 opt(4,5)"),
969+
RemoteBranch: true,
970+
}
971+
physProps11 := physical.Required{
972+
Presentation: physical.Presentation{{Alias: "c", ID: 1}},
973+
Ordering: props.ParseOrderingChoice("+(1|2),+3 opt(4,5)"),
974+
RemoteBranch: true,
975+
}
966976

967977
testCases := []struct {
968978
phys *physical.Required
@@ -978,6 +988,8 @@ func TestInternerPhysProps(t *testing.T) {
978988
{phys: &physProps7, inCache: false},
979989
{phys: &physProps8, inCache: false},
980990
{phys: &physProps9, inCache: true},
991+
{phys: &physProps10, inCache: false},
992+
{phys: &physProps11, inCache: true},
981993
}
982994

983995
inCache := make(map[*physical.Required]bool)

pkg/sql/opt/props/physical/required.go

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ type Required struct {
4141
// is required or provided.
4242
Ordering props.OrderingChoice
4343

44+
// Distribution specifies the physical distribution of result rows. This is
45+
// defined as the set of regions that may contain result rows. If
46+
// Distribution is not defined, then no particular distribution is required.
47+
// Currently, the only operator in a plan tree that has a required
48+
// distribution is the root, since data must always be returned to the gateway
49+
// region.
50+
Distribution Distribution
51+
4452
// LimitHint specifies a "soft limit" to the number of result rows that may
4553
// be required of the expression. If requested, an expression will still need
4654
// to return all result rows, but it can be optimized based on the assumption
@@ -50,13 +58,10 @@ type Required struct {
5058
// using LimitHintInt64.
5159
LimitHint float64
5260

53-
// Distribution specifies the physical distribution of result rows. This is
54-
// defined as the set of regions that may contain result rows. If
55-
// Distribution is not defined, then no particular distribution is required.
56-
// Currently, the only operator in a plan tree that has a required
57-
// distribution is the root, since data must always be returned to the gateway
58-
// region.
59-
Distribution Distribution
61+
// RemoteBranch signals that the expression is on the remote side of a
62+
// locality-optimized search. If the local branch fulfills the query, the
63+
// remote branch is not executed.
64+
RemoteBranch bool
6065
}
6166

6267
// MinRequired are the default physical properties that require nothing and
@@ -66,7 +71,8 @@ var MinRequired = &Required{}
6671
// Defined is true if any physical property is defined. If none is defined, then
6772
// this is an instance of MinRequired.
6873
func (p *Required) Defined() bool {
69-
return !p.Presentation.Any() || !p.Ordering.Any() || p.LimitHint != 0 || !p.Distribution.Any()
74+
return !p.Presentation.Any() || !p.Ordering.Any() || !p.Distribution.Any() ||
75+
p.LimitHint != 0 || p.RemoteBranch
7076
}
7177

7278
// ColSet returns the set of columns used by any of the physical properties.
@@ -97,11 +103,14 @@ func (p *Required) String() string {
97103
if !p.Ordering.Any() {
98104
output("ordering", p.Ordering.Format)
99105
}
106+
if !p.Distribution.Any() {
107+
output("distribution", p.Distribution.format)
108+
}
100109
if p.LimitHint != 0 {
101110
output("limit hint", func(buf *bytes.Buffer) { fmt.Fprintf(buf, "%.2f", p.LimitHint) })
102111
}
103-
if !p.Distribution.Any() {
104-
output("distribution", p.Distribution.format)
112+
if p.RemoteBranch {
113+
output("remote branch", func(buf *bytes.Buffer) { buf.WriteString("true") })
105114
}
106115

107116
// Handle empty properties case.
@@ -114,7 +123,8 @@ func (p *Required) String() string {
114123
// Equals returns true if the two physical properties are identical.
115124
func (p *Required) Equals(rhs *Required) bool {
116125
return p.Presentation.Equals(rhs.Presentation) && p.Ordering.Equals(&rhs.Ordering) &&
117-
p.LimitHint == rhs.LimitHint && p.Distribution.Equals(rhs.Distribution)
126+
p.Distribution.Equals(rhs.Distribution) &&
127+
p.LimitHint == rhs.LimitHint && p.RemoteBranch == rhs.RemoteBranch
118128
}
119129

120130
// LimitHintInt64 returns the limit hint converted to an int64.

pkg/sql/opt/xform/coster.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,18 @@ func (c *coster) ComputeCost(candidate memo.RelExpr, required *physical.Required
625625
// preferable, all else being equal.
626626
cost.C += cpuCostFactor
627627

628+
// Within a locality optimized search, distribution costs are added to the
629+
// remote branch, but not the local branch. Scale the remote branch costs by
630+
// a factor reflecting the likelihood of executing that branch. Right now
631+
// this probability is not estimated, so just use a default probability of
632+
// 1/10.
633+
// TODO(msirek): Add an estimation of the probability of executing the
634+
// remote branch, e.g., compare the size of the limit hint with the expected
635+
// row count of the local branch. Is there a better approach?
636+
if required.RemoteBranch {
637+
cost.C /= 10
638+
}
639+
628640
// Add a one-time cost for any operator with unbounded cardinality. This
629641
// ensures we prefer plans that push limits as far down the tree as possible,
630642
// all else being equal.

pkg/sql/opt/xform/optimizer.go

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -618,21 +618,7 @@ func (o *Optimizer) optimizeGroupMember(
618618
childCost, childOptimized := o.optimizeExpr(member.Child(i), childRequired)
619619

620620
// Accumulate cost of children.
621-
if member.Op() == opt.LocalityOptimizedSearchOp && i > 0 {
622-
// If the child ops are locality optimized, distribution costs are added
623-
// to the remote branch, but not the local branch. Scale the remote
624-
// branch costs by a factor reflecting the likelihood of executing that
625-
// branch. Right now this probability is not estimated, so just use a
626-
// default probability of 1/10.
627-
// TODO(msirek): Add an estimation of the probability of executing the
628-
// remote branch, e.g., compare the size of the limit hint
629-
// with the expected row count of the local branch.
630-
// Is there a better approach?
631-
childCost.C /= 10
632-
cost.Add(childCost)
633-
} else {
634-
cost.Add(childCost)
635-
}
621+
cost.Add(childCost)
636622

637623
// If any child expression is not fully optimized, then the parent
638624
// expression is also not fully optimized.

pkg/sql/opt/xform/physical_props.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ func BuildChildPhysicalProps(
8989

9090
childProps.Ordering = ordering.BuildChildRequired(mem, parent, &parentProps.Ordering, nth)
9191
childProps.Distribution = distribution.BuildChildRequired(parent, &parentProps.Distribution, nth)
92+
childProps.RemoteBranch = parentProps.RemoteBranch
9293

9394
switch parent.Op() {
9495
case opt.LimitOp:
@@ -114,11 +115,17 @@ func BuildChildPhysicalProps(
114115
childProps.LimitHint = parentProps.LimitHint
115116

116117
case opt.ExceptOp, opt.ExceptAllOp, opt.IntersectOp, opt.IntersectAllOp,
117-
opt.UnionOp, opt.UnionAllOp, opt.LocalityOptimizedSearchOp:
118+
opt.UnionOp, opt.UnionAllOp:
118119
// TODO(celine): Set operation limits need further thought; for example,
119120
// the right child of an ExceptOp should not be limited.
120121
childProps.LimitHint = parentProps.LimitHint
121122

123+
case opt.LocalityOptimizedSearchOp:
124+
childProps.LimitHint = parentProps.LimitHint
125+
if nth == 1 {
126+
childProps.RemoteBranch = true
127+
}
128+
122129
case opt.DistinctOnOp:
123130
distinctCount := parent.Relational().Statistics().RowCount
124131
if parentProps.LimitHint > 0 {

pkg/sql/opt/xform/testdata/coster/zone

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ locality-optimized-search
873873
├── constraint: /13/15/16: [/'west'/1/'foo' - /'west'/1/'foo']
874874
├── cardinality: [0 - 1]
875875
├── stats: [rows=1, distinct(13)=1, null(13)=0, distinct(15)=1, null(15)=0, distinct(16)=1, null(16)=0, distinct(13,15,16)=1, null(13,15,16)=0]
876-
├── cost: 5.17
876+
├── cost: 0.517
877877
├── key: ()
878878
├── fd: ()-->(13-16)
879879
└── prune: (13-16)
@@ -931,7 +931,7 @@ anti-join (lookup abc_part@bc_idx [as=a2])
931931
│ │ ├── constraint: /20/22/23: [/'west'/1/'foo' - /'west'/1/'foo']
932932
│ │ ├── cardinality: [0 - 1]
933933
│ │ ├── stats: [rows=1, distinct(20)=1, null(20)=0, distinct(22)=1, null(22)=0, distinct(23)=1, null(23)=0, distinct(20,22,23)=1, null(20,22,23)=0]
934-
│ │ ├── cost: 5.17
934+
│ │ ├── cost: 0.517
935935
│ │ ├── key: ()
936936
│ │ └── fd: ()-->(20-23)
937937
│ └── filters (true)

0 commit comments

Comments
 (0)