sql: clarify fetcher parallelization

yuzefovich · yuzefovich · commit be8c8a37ebd2 · 2025-06-11T18:40:57.000-07:00
I find "limit batches" terminology a bit confusing, so this commit
switches places where we use it to "parallelize" which indicates that the
DistSender-level cross-range parallelism should be used, which in turn
means that TargetBytes limit cannot be set (i.e. "should _not_ limit
batches").

Release note: None
diff --git a/pkg/sql/colfetcher/cfetcher.go b/pkg/sql/colfetcher/cfetcher.go
@@ -586,15 +586,15 @@ func cFetcherFirstBatchLimit(limitHint rowinfra.RowLimit, maxKeysPerRow uint32)
 func (cf *cFetcher) StartScan(
 	ctx context.Context,
 	spans roachpb.Spans,
-	limitBatches bool,
+	parallelize bool,
 	batchBytesLimit rowinfra.BytesLimit,
 	limitHint rowinfra.RowLimit,
 ) error {
 	if len(spans) == 0 {
 		return errors.AssertionFailedf("no spans")
 	}
-	if !limitBatches && batchBytesLimit != rowinfra.NoBytesLimit {
-		return errors.AssertionFailedf("batchBytesLimit set without limitBatches")
+	if parallelize && batchBytesLimit != rowinfra.NoBytesLimit {
+		return errors.AssertionFailedf("TargetBytes limit requested with parallelize=true")
 	}
 
 	firstBatchLimit := cFetcherFirstBatchLimit(limitHint, cf.table.spec.MaxKeysPerRow)
diff --git a/pkg/sql/colfetcher/colbatch_scan.go b/pkg/sql/colfetcher/colbatch_scan.go
@@ -221,11 +221,10 @@ func (s *ColBatchScan) Init(ctx context.Context) {
 		s.Ctx, s.flowCtx, "colbatchscan", s.processorID,
 		&s.ContentionEventsListener, &s.ScanStatsListener, &s.TenantConsumptionListener,
 	)
-	limitBatches := !s.parallelize
 	if err := s.cf.StartScan(
 		s.Ctx,
 		s.Spans,
-		limitBatches,
+		s.parallelize,
 		s.batchBytesLimit,
 		s.limitHint,
 	); err != nil {
diff --git a/pkg/sql/colfetcher/index_join.go b/pkg/sql/colfetcher/index_join.go
@@ -215,7 +215,7 @@ func (s *ColIndexJoin) Next() coldata.Batch {
 			if err := s.cf.StartScan(
 				s.Ctx,
 				spans,
-				false, /* limitBatches */
+				true, /* parallelize */
 				rowinfra.NoBytesLimit,
 				rowinfra.NoRowLimit,
 			); err != nil {
diff --git a/pkg/sql/rowexec/joinreader.go b/pkg/sql/rowexec/joinreader.go
@@ -115,11 +115,15 @@ type joinReader struct {
 
 	// fetcher wraps the row.Fetcher used to perform lookups. This enables the
 	// joinReader to wrap the fetcher with a stat collector when necessary.
-	fetcher            rowFetcher
-	alloc              tree.DatumAlloc
-	rowAlloc           rowenc.EncDatumRowAlloc
-	shouldLimitBatches bool
-	readerType         joinReaderType
+	fetcher  rowFetcher
+	alloc    tree.DatumAlloc
+	rowAlloc rowenc.EncDatumRowAlloc
+	// parallelize, if true, indicates that the KV lookups will be parallelized
+	// across ranges when using the DistSender API. It has no influence on the
+	// behavior when using the Streamer API (when the lookups are always
+	// parallelized).
+	parallelize bool
+	readerType  joinReaderType
 
 	// txn is the transaction used by the join reader.
 	txn *kv.Txn
@@ -326,18 +330,19 @@ func newJoinReader(
 	// in case of indexJoinReaderType, we know that there's exactly one lookup
 	// row for each input row. Similarly, in case of spec.LookupColumnsAreKey,
 	// we know that there's at most one lookup row per input row. In other
-	// cases, we use limits.
-	shouldLimitBatches := !spec.LookupColumnsAreKey && readerType == lookupJoinReaderType
+	// cases, we disable parallelism and use the TargetBytes limit.
+	parallelize := spec.LookupColumnsAreKey || readerType == indexJoinReaderType
 	if flowCtx.EvalCtx.SessionData().ParallelizeMultiKeyLookupJoinsEnabled {
-		shouldLimitBatches = false
+		parallelize = true
 	}
 	if spec.MaintainLookupOrdering {
-		// MaintainLookupOrdering indicates the output of the lookup joiner should
-		// be sorted by <inputCols>, <lookupCols>. It doesn't make sense for
-		// MaintainLookupOrdering to be true when MaintainOrdering is not.
-		// Additionally, we need to disable parallelism for the traditional fetcher
-		// in order to ensure the lookups are ordered, so set shouldLimitBatches.
-		spec.MaintainOrdering, shouldLimitBatches = true, true
+		// MaintainLookupOrdering indicates the output of the lookup joiner
+		// should be sorted by <inputCols>, <lookupCols>. It doesn't make sense
+		// for MaintainLookupOrdering to be true when MaintainOrdering is not.
+		//
+		// Additionally, we need to disable parallelism for the traditional
+		// fetcher in order to ensure the lookups are ordered.
+		spec.MaintainOrdering, parallelize = true, false
 	}
 	useStreamer, txn, err := flowCtx.UseStreamer(ctx)
 	if err != nil {
@@ -354,7 +359,7 @@ func newJoinReader(
 		input:                               input,
 		lookupCols:                          lookupCols,
 		outputGroupContinuationForLeftRow:   spec.OutputGroupContinuationForLeftRow,
-		shouldLimitBatches:                  shouldLimitBatches,
+		parallelize:                         parallelize,
 		readerType:                          readerType,
 		txn:                                 txn,
 		usesStreamer:                        useStreamer,
@@ -862,8 +867,8 @@ func (jr *joinReader) getBatchBytesLimit() rowinfra.BytesLimit {
 		// BatchRequests.
 		return rowinfra.NoBytesLimit
 	}
-	if !jr.shouldLimitBatches {
-		// We deem it safe to not limit the batches in order to get the
+	if jr.parallelize {
+		// We deem it safe to not use the TargetBytes limit in order to get the
 		// DistSender-level parallelism.
 		return rowinfra.NoBytesLimit
 	}
@@ -1047,11 +1052,13 @@ func (jr *joinReader) readInput() (
 	//    fetcher only accepts a limit if the spans are sorted), and
 	// b) Pebble has various optimizations for Seeks in sorted order.
 	if jr.readerType == indexJoinReaderType && jr.maintainOrdering {
-		// Assert that the index join doesn't have shouldLimitBatches set. Since we
-		// didn't sort above, the fetcher doesn't support a limit.
-		if jr.shouldLimitBatches {
+		// Assert that the index join has 'parallelize=true' set. Since we
+		// didn't sort above, the fetcher doesn't support the TargetBytes limit
+		// (which would be set via getBatchBytesLimit() if 'parallelize' was
+		// false).
+		if !jr.parallelize {
 			err := errors.AssertionFailedf("index join configured with both maintainOrdering and " +
-				"shouldLimitBatched; this shouldn't have happened as the implementation doesn't support it")
+				"parallelize=false; this shouldn't have happened as the implementation doesn't support it")
 			jr.MoveToDraining(err)
 			return jrStateUnknown, nil, jr.DrainHelper()
 		}
diff --git a/pkg/sql/rowexec/tablereader.go b/pkg/sql/rowexec/tablereader.go
@@ -199,17 +199,14 @@ func (tr *tableReader) startScan(ctx context.Context) error {
 	if cb := tr.FlowCtx.Cfg.TestingKnobs.TableReaderStartScanCb; cb != nil {
 		cb()
 	}
-	limitBatches := !tr.parallelize
-	var bytesLimit rowinfra.BytesLimit
-	if !limitBatches {
-		bytesLimit = rowinfra.NoBytesLimit
-	} else {
+	bytesLimit := rowinfra.NoBytesLimit
+	if !tr.parallelize {
 		bytesLimit = rowinfra.BytesLimit(tr.FlowCtx.Cfg.TestingKnobs.TableReaderBatchBytesLimit)
 		if bytesLimit == 0 {
 			bytesLimit = rowinfra.GetDefaultBatchBytesLimit(tr.FlowCtx.EvalCtx.TestingKnobs.ForceProductionValues)
 		}
 	}
-	log.VEventf(ctx, 1, "starting scan with limitBatches %t", limitBatches)
+	log.VEventf(ctx, 1, "starting scan with parallelize=%t", tr.parallelize)
 	var err error
 	if tr.maxTimestampAge == 0 {
 		err = tr.fetcher.StartScan(