Skip to content

Commit 5c276ae

Browse files
author
Mark Sirek
committed
xform: compute distribution of a lookup join with a lookup join as input
The adjustment to optimizer costing of lookup joins, added in v23.1, to account for non-local input relation distribution (among the regions in a multiregion database) did not account for the case where the input to the lookup join is itself a lookup join. This resulted in an extra distribution cost being added to some best-cost lookup joins, causing the optimizer to no longer select them. The issue is in function `getCRBDRegionColFromInput`, which only looks for scan and locality-optimized scan operations as inputs. It is now updated with a recursive call to itself plus a call to `BuildLookupJoinLookupTableDistribution`, so the proper distribution of a chain of lookup joins, plus the associated `crdb_region` column in the lookup table can be determined. Function `BuildLookupJoinLookupTableDistribution` is updated to return the first lookup index column, when it is matched with the input's crdb_region column, to facilitate this behavior. Fixes: cockroachdb#105942 Release note (bug fix): This patch fixes an optimizer costing bug introduced in v23.1 which may cause a query involving 2 or more joins with REGIONAL BY ROW tables to not pick the most optimal lookup joins.
1 parent 60b251e commit 5c276ae

File tree

5 files changed

+119
-15
lines changed

5 files changed

+119
-15
lines changed

pkg/ccl/logictestccl/testdata/logic_test/multi_region_remote_access_error

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ locality-optimized-search
639639
# Locality optimized search with lookup join will be supported in phase 2 or 3
640640
# when we can dynamically determine if the lookup will access a remote region.
641641
retry
642-
statement error pq: Query has no home region\. Try adding a filter on o\.crdb_region and/or on key column \(o\.id\)\. For more information, see https://www.cockroachlabs.com/docs/stable/cost-based-optimizer.html#control-whether-queries-are-limited-to-a-single-region
642+
statement error pq: Query has no home region\. Try adding a filter on o\.crdb_region and/or on key column \(o\.cust_id\)\. For more information, see https://www.cockroachlabs.com/docs/stable/cost-based-optimizer.html#control-whether-queries-are-limited-to-a-single-region
643643
SELECT * FROM customers c JOIN orders o ON c.id = o.cust_id AND
644644
(c.crdb_region = o.crdb_region) WHERE c.id = '69a1c2c2-5b18-459e-94d2-079dc53a4dd0'
645645

pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_query_behavior

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3554,3 +3554,91 @@ EXPLAIN (VEC) SELECT * FROM child LEFT JOIN parent ON p_id = c_p_id WHERE c_id =
35543554

35553555
statement ok
35563556
RESET vectorize
3557+
3558+
statement ok
3559+
CREATE TABLE abc (
3560+
id UUID PRIMARY KEY,
3561+
id1 UUID NOT NULL,
3562+
created_at TIMESTAMP NOT NULL,
3563+
updated_at TIMESTAMP NOT NULL,
3564+
id2 UUID NULL,
3565+
INDEX (id1 ASC, id2 ASC)
3566+
) LOCALITY REGIONAL BY ROW;
3567+
3568+
statement ok
3569+
CREATE TABLE xyz (
3570+
id UUID PRIMARY KEY,
3571+
str STRING NOT NULL,
3572+
abc_id UUID NOT NULL,
3573+
id2 UUID NULL,
3574+
FOREIGN KEY (abc_id) REFERENCES abc(id) ON DELETE CASCADE,
3575+
INDEX (abc_id ASC, id2 ASC),
3576+
INDEX (id2 ASC, str ASC, abc_id ASC)
3577+
) LOCALITY REGIONAL BY ROW;
3578+
3579+
statement ok
3580+
ALTER TABLE abc INJECT STATISTICS '[
3581+
{
3582+
"avg_size": 3000,
3583+
"columns": ["id"],
3584+
"distinct_count": 100,
3585+
"row_count": 100,
3586+
"created_at": "2018-01-01 1:00:00.00000+00:00"
3587+
}
3588+
]';
3589+
3590+
statement ok
3591+
ALTER TABLE xyz INJECT STATISTICS '[
3592+
{
3593+
"avg_size": 3000,
3594+
"columns": ["id"],
3595+
"distinct_count": 100,
3596+
"row_count": 100,
3597+
"created_at": "2018-01-01 1:00:00.00000+00:00"
3598+
}
3599+
]';
3600+
3601+
# Regression test for #105942
3602+
# The following should produce 2 lookup joins with a local distribution.
3603+
query T retry
3604+
EXPLAIN SELECT
3605+
xyz.str,
3606+
abc.id,
3607+
abc.id1,
3608+
abc.id2,
3609+
abc.created_at,
3610+
abc.updated_at
3611+
FROM
3612+
abc JOIN xyz ON
3613+
xyz.abc_id = abc.id
3614+
AND xyz.id2 = abc.id2
3615+
AND xyz.crdb_region = abc.crdb_region
3616+
WHERE
3617+
abc.id1 = '6da4f356-e526-4b78-b9f9-bbb1a7fc12d6'
3618+
AND abc.id2 = '68088706-02c6-47d1-b993-a421cd761f2b'
3619+
AND abc.crdb_region = 'ap-southeast-2'
3620+
AND xyz.crdb_region = 'ap-southeast-2';
3621+
----
3622+
distribution: local
3623+
vectorized: true
3624+
·
3625+
• lookup join
3626+
│ estimated row count: 1
3627+
│ table: abc@abc_pkey
3628+
│ equality: (crdb_region, id) = (crdb_region,id)
3629+
│ equality cols are key
3630+
3631+
└── • lookup join
3632+
│ estimated row count: 0
3633+
│ table: abc@abc_id1_id2_idx
3634+
│ equality: (crdb_region, lookup_join_const_col_@2, id2, abc_id) = (crdb_region,id1,id2,id)
3635+
│ equality cols are key
3636+
│ pred: (id2 = '68088706-02c6-47d1-b993-a421cd761f2b') AND (crdb_region = 'ap-southeast-2')
3637+
3638+
└── • render
3639+
3640+
└── • scan
3641+
estimated row count: 4 (3.7% of the table; stats collected <hidden> ago)
3642+
table: xyz@xyz_id2_str_abc_id_idx
3643+
spans: [/'ap-southeast-2'/'68088706-02c6-47d1-b993-a421cd761f2b' - /'ap-southeast-2'/'68088706-02c6-47d1-b993-a421cd761f2b']
3644+

pkg/sql/opt/distribution/distribution.go

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -155,25 +155,33 @@ func GetDEnumAsStringFromConstantExpr(expr opt.Expr) (enumAsString string, ok bo
155155
// of the input REGIONAL BY ROW table holding the crdb_region column, and
156156
// inputDistribution is the distribution of the operation on that table
157157
// (Scan or LocalityOptimizedSearch).
158+
// The distribution of the lookup join is returned, plus the first lookup index
159+
// column, as matched in the lookup with crdbRegionColID (if it can be
160+
// determined, otherwise zero).
158161
func BuildLookupJoinLookupTableDistribution(
159162
ctx context.Context,
160163
evalCtx *eval.Context,
161164
lookupJoin *memo.LookupJoinExpr,
162165
crdbRegionColID opt.ColumnID,
163166
inputDistribution physical.Distribution,
164-
) (provided physical.Distribution) {
167+
) (firstLookupIndexCol opt.ColumnID, provided physical.Distribution) {
165168
lookupTableMeta := lookupJoin.Memo().Metadata().TableMeta(lookupJoin.Table)
166169
lookupTable := lookupTableMeta.Table
167170

171+
idx := lookupTable.Index(lookupJoin.Index)
172+
col := idx.Column(0)
173+
ord := col.Ordinal()
174+
colIDOfFirstLookupIndexColumn := lookupTableMeta.MetaID.ColumnID(ord)
175+
168176
if lookupJoin.LocalityOptimized || lookupJoin.ChildOfLocalityOptimizedSearch {
169177
provided.FromLocality(evalCtx.Locality)
170-
return provided
178+
return 0 /* firstLookupIndexCol */, provided
171179
} else if lookupTable.IsGlobalTable() {
172180
provided.FromLocality(evalCtx.Locality)
173-
return provided
181+
return 0 /* firstLookupIndexCol */, provided
174182
} else if homeRegion, ok := lookupTable.HomeRegion(); ok {
175183
provided.Regions = []string{homeRegion}
176-
return provided
184+
return 0 /* firstLookupIndexCol */, provided
177185
} else if lookupTable.IsRegionalByRow() {
178186
if len(lookupJoin.KeyCols) > 0 {
179187
inputExpr := lookupJoin.Input
@@ -182,46 +190,46 @@ func BuildLookupJoinLookupTableDistribution(
182190
if filterExpr, ok := invertedJoinExpr.GetConstExprFromFilter(firstKeyColID); ok {
183191
if homeRegion, ok = GetDEnumAsStringFromConstantExpr(filterExpr); ok {
184192
provided.Regions = []string{homeRegion}
185-
return provided
193+
return colIDOfFirstLookupIndexColumn, provided
186194
}
187195
}
188196
} else if projectExpr, ok := inputExpr.(*memo.ProjectExpr); ok {
189197
regionName := projectExpr.GetProjectedEnumConstant(firstKeyColID)
190198
if regionName != "" {
191199
provided.Regions = []string{regionName}
192-
return provided
200+
return colIDOfFirstLookupIndexColumn, provided
193201
}
194202
}
195203
if crdbRegionColID == firstKeyColID {
196204
provided.FromIndexScan(ctx, evalCtx, lookupTableMeta, lookupJoin.Index, nil)
197205
if !inputDistribution.Any() &&
198206
(provided.Any() || len(provided.Regions) > len(inputDistribution.Regions)) {
199-
return inputDistribution
207+
return colIDOfFirstLookupIndexColumn, inputDistribution
200208
}
201-
return provided
209+
return 0 /* firstLookupIndexCol */, provided
202210
}
203211
} else if len(lookupJoin.LookupJoinPrivate.LookupExpr) > 0 {
204212
if filterIdx, ok := lookupJoin.GetConstPrefixFilter(lookupJoin.Memo().Metadata()); ok {
205213
firstIndexColEqExpr := lookupJoin.LookupJoinPrivate.LookupExpr[filterIdx].Condition
206214
if firstIndexColEqExpr.Op() == opt.EqOp {
207215
if regionName, ok := GetDEnumAsStringFromConstantExpr(firstIndexColEqExpr.Child(1)); ok {
208216
provided.Regions = []string{regionName}
209-
return provided
217+
return colIDOfFirstLookupIndexColumn, provided
210218
}
211219
}
212220
} else if lookupJoin.ColIsEquivalentWithLookupIndexPrefix(lookupJoin.Memo().Metadata(), crdbRegionColID) {
213221
// We have a `crdb_region = crdb_region` term in `LookupJoinPrivate.LookupExpr`.
214222
provided.FromIndexScan(ctx, evalCtx, lookupTableMeta, lookupJoin.Index, nil)
215223
if !inputDistribution.Any() &&
216224
(provided.Any() || len(provided.Regions) > len(inputDistribution.Regions)) {
217-
return inputDistribution
225+
return colIDOfFirstLookupIndexColumn, inputDistribution
218226
}
219-
return provided
227+
return 0 /* firstLookupIndexCol */, provided
220228
}
221229
}
222230
}
223231
provided.FromIndexScan(ctx, evalCtx, lookupTableMeta, lookupJoin.Index, nil)
224-
return provided
232+
return 0 /* firstLookupIndexCol */, provided
225233
}
226234

227235
// BuildInvertedJoinLookupTableDistribution builds the Distribution that results

pkg/sql/opt/xform/coster.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,13 @@ func (c *coster) getCRBDRegionColFromInput(
10621062
return 0, physical.Distribution{}
10631063
}
10641064
}
1065+
if lookupJoinExpr, ok := maybeScan.(*memo.LookupJoinExpr); ok {
1066+
crdbRegionColID, inputDistribution = c.getCRBDRegionColFromInput(lookupJoinExpr, required)
1067+
crdbRegionColID, inputDistribution =
1068+
distribution.BuildLookupJoinLookupTableDistribution(
1069+
c.ctx, c.evalCtx, lookupJoinExpr, crdbRegionColID, inputDistribution)
1070+
return crdbRegionColID, inputDistribution
1071+
}
10651072
if localityOptimizedScan, ok := maybeScan.(*memo.LocalityOptimizedSearchExpr); ok {
10661073
maybeScan = localityOptimizedScan.Local
10671074
needRemap = true
@@ -1119,7 +1126,8 @@ func (c *coster) computeLookupJoinCost(
11191126
join.LocalityOptimized,
11201127
)
11211128
crdbRegionColID, inputDistribution := c.getCRBDRegionColFromInput(join, required)
1122-
provided := distribution.BuildLookupJoinLookupTableDistribution(c.ctx, c.evalCtx, join, crdbRegionColID, inputDistribution)
1129+
_, provided := distribution.BuildLookupJoinLookupTableDistribution(
1130+
c.ctx, c.evalCtx, join, crdbRegionColID, inputDistribution)
11231131
extraCost := c.distributionCost(provided)
11241132
cost += extraCost
11251133
return cost

pkg/sql/opt/xform/join_funcs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1890,7 +1890,7 @@ func (c *CustomFuncs) CanMaybeGenerateLocalityOptimizedSearchOfLookupJoins(
18901890
// are done in the local region.
18911891
func (c *CustomFuncs) LookupsAreLocal(lookupJoinExpr *memo.LookupJoinExpr) bool {
18921892
var inputDistribution physical.Distribution
1893-
provided := distribution.BuildLookupJoinLookupTableDistribution(c.e.ctx, c.e.f.EvalContext(), lookupJoinExpr, 0, inputDistribution)
1893+
_, provided := distribution.BuildLookupJoinLookupTableDistribution(c.e.ctx, c.e.f.EvalContext(), lookupJoinExpr, 0, inputDistribution)
18941894
if provided.Any() || len(provided.Regions) != 1 {
18951895
return false
18961896
}

0 commit comments

Comments
 (0)