@@ -115,11 +115,15 @@ type joinReader struct {
115
115
116
116
// fetcher wraps the row.Fetcher used to perform lookups. This enables the
117
117
// joinReader to wrap the fetcher with a stat collector when necessary.
118
- fetcher rowFetcher
119
- alloc tree.DatumAlloc
120
- rowAlloc rowenc.EncDatumRowAlloc
121
- shouldLimitBatches bool
122
- readerType joinReaderType
118
+ fetcher rowFetcher
119
+ alloc tree.DatumAlloc
120
+ rowAlloc rowenc.EncDatumRowAlloc
121
+ // parallelize, if true, indicates that the KV lookups will be parallelized
122
+ // across ranges when using the DistSender API. It has no influence on the
123
+ // behavior when using the Streamer API (when the lookups are always
124
+ // parallelized).
125
+ parallelize bool
126
+ readerType joinReaderType
123
127
124
128
// txn is the transaction used by the join reader.
125
129
txn * kv.Txn
@@ -238,11 +242,6 @@ type joinReader struct {
238
242
// and requires that the spec has MaintainOrdering set to true.
239
243
outputGroupContinuationForLeftRow bool
240
244
241
- // lookupBatchBytesLimit controls the TargetBytes of lookup requests. If 0, a
242
- // default will be used. Regardless of this value, bytes limits aren't always
243
- // used.
244
- lookupBatchBytesLimit rowinfra.BytesLimit
245
-
246
245
// limitHintHelper is used in limiting batches of input rows in the presence
247
246
// of hard and soft limits.
248
247
limitHintHelper execinfra.LimitHintHelper
@@ -331,18 +330,19 @@ func newJoinReader(
331
330
// in case of indexJoinReaderType, we know that there's exactly one lookup
332
331
// row for each input row. Similarly, in case of spec.LookupColumnsAreKey,
333
332
// we know that there's at most one lookup row per input row. In other
334
- // cases, we use limits .
335
- shouldLimitBatches := ! spec .LookupColumnsAreKey && readerType == lookupJoinReaderType
333
+ // cases, we disable parallelism and use the TargetBytes limit .
334
+ parallelize := spec .LookupColumnsAreKey || readerType == indexJoinReaderType
336
335
if flowCtx .EvalCtx .SessionData ().ParallelizeMultiKeyLookupJoinsEnabled {
337
- shouldLimitBatches = false
336
+ parallelize = true
338
337
}
339
338
if spec .MaintainLookupOrdering {
340
- // MaintainLookupOrdering indicates the output of the lookup joiner should
341
- // be sorted by <inputCols>, <lookupCols>. It doesn't make sense for
342
- // MaintainLookupOrdering to be true when MaintainOrdering is not.
343
- // Additionally, we need to disable parallelism for the traditional fetcher
344
- // in order to ensure the lookups are ordered, so set shouldLimitBatches.
345
- spec .MaintainOrdering , shouldLimitBatches = true , true
339
+ // MaintainLookupOrdering indicates the output of the lookup joiner
340
+ // should be sorted by <inputCols>, <lookupCols>. It doesn't make sense
341
+ // for MaintainLookupOrdering to be true when MaintainOrdering is not.
342
+ //
343
+ // Additionally, we need to disable parallelism for the traditional
344
+ // fetcher in order to ensure the lookups are ordered.
345
+ spec .MaintainOrdering , parallelize = true , false
346
346
}
347
347
useStreamer , txn , err := flowCtx .UseStreamer (ctx )
348
348
if err != nil {
@@ -359,11 +359,10 @@ func newJoinReader(
359
359
input : input ,
360
360
lookupCols : lookupCols ,
361
361
outputGroupContinuationForLeftRow : spec .OutputGroupContinuationForLeftRow ,
362
- shouldLimitBatches : shouldLimitBatches ,
362
+ parallelize : parallelize ,
363
363
readerType : readerType ,
364
364
txn : txn ,
365
365
usesStreamer : useStreamer ,
366
- lookupBatchBytesLimit : rowinfra .BytesLimit (spec .LookupBatchBytesLimit ),
367
366
limitHintHelper : execinfra .MakeLimitHintHelper (spec .LimitHint , post ),
368
367
errorOnLookup : errorOnLookup ,
369
368
allowEnforceHomeRegionFollowerReads : flowCtx .EvalCtx .SessionData ().EnforceHomeRegionFollowerReadsEnabled ,
@@ -868,16 +867,15 @@ func (jr *joinReader) getBatchBytesLimit() rowinfra.BytesLimit {
868
867
// BatchRequests.
869
868
return rowinfra .NoBytesLimit
870
869
}
871
- if ! jr .shouldLimitBatches {
872
- // We deem it safe to not limit the batches in order to get the
870
+ if jr .parallelize {
871
+ // We deem it safe to not use the TargetBytes limit in order to get the
873
872
// DistSender-level parallelism.
874
873
return rowinfra .NoBytesLimit
875
874
}
876
- bytesLimit := jr .lookupBatchBytesLimit
877
- if bytesLimit == 0 {
878
- bytesLimit = rowinfra .GetDefaultBatchBytesLimit (jr .FlowCtx .EvalCtx .TestingKnobs .ForceProductionValues )
875
+ if testingLimit := jr .FlowCtx .Cfg .TestingKnobs .JoinReaderBatchBytesLimit ; testingLimit != 0 {
876
+ return rowinfra .BytesLimit (testingLimit )
879
877
}
880
- return bytesLimit
878
+ return rowinfra . GetDefaultBatchBytesLimit ( jr . FlowCtx . EvalCtx . TestingKnobs . ForceProductionValues )
881
879
}
882
880
883
881
// readInput reads the next batch of input rows and starts an index scan, which
@@ -1054,11 +1052,13 @@ func (jr *joinReader) readInput() (
1054
1052
// fetcher only accepts a limit if the spans are sorted), and
1055
1053
// b) Pebble has various optimizations for Seeks in sorted order.
1056
1054
if jr .readerType == indexJoinReaderType && jr .maintainOrdering {
1057
- // Assert that the index join doesn't have shouldLimitBatches set. Since we
1058
- // didn't sort above, the fetcher doesn't support a limit.
1059
- if jr .shouldLimitBatches {
1055
+ // Assert that the index join has 'parallelize=true' set. Since we
1056
+ // didn't sort above, the fetcher doesn't support the TargetBytes limit
1057
+ // (which would be set via getBatchBytesLimit() if 'parallelize' was
1058
+ // false).
1059
+ if ! jr .parallelize {
1060
1060
err := errors .AssertionFailedf ("index join configured with both maintainOrdering and " +
1061
- "shouldLimitBatched ; this shouldn't have happened as the implementation doesn't support it" )
1061
+ "parallelize=false ; this shouldn't have happened as the implementation doesn't support it" )
1062
1062
jr .MoveToDraining (err )
1063
1063
return jrStateUnknown , nil , jr .DrainHelper ()
1064
1064
}
0 commit comments