@@ -34,7 +34,7 @@ using namespace vectorized;
3434
3535Status ParallelScannerBuilder::build_scanners (std::list<ScannerSPtr>& scanners) {
3636 RETURN_IF_ERROR (_load ());
37- if (_optimize_index_scan_parallelism ) {
37+ if (_scan_parallelism_by_segment ) {
3838 return _build_scanners_by_segment (scanners);
3939 } else if (_is_dup_mow_key) {
4040 // Default strategy for DUP/MOW tables: split by rowids within segments
@@ -87,7 +87,7 @@ Status ParallelScannerBuilder::_build_scanners_by_rowid(std::list<ScannerSPtr>&
8787 auto rows_need = _rows_per_scanner - rows_collected;
8888
8989 // 0.9: try to avoid splitting the segments into excessively small parts.
90- if (rows_need >= remaining_rows * 0.9 ) {
90+ if (rows_need >= remaining_rows * 9 / 10 ) {
9191 rows_need = remaining_rows;
9292 }
9393 DCHECK_LE (rows_need, remaining_rows);
@@ -167,6 +167,8 @@ Status ParallelScannerBuilder::_build_scanners_by_rowid(std::list<ScannerSPtr>&
167167// for the involved tablets. It preserves delete predicates and key ranges, and clones
168168// RowsetReader per scanner to avoid sharing between scanners.
169169Status ParallelScannerBuilder::_build_scanners_by_segment (std::list<ScannerSPtr>& scanners) {
170+ DCHECK_GE (_rows_per_scanner, _min_rows_per_scanner);
171+
170172 for (auto && [tablet, version] : _tablets) {
171173 DCHECK (_all_read_sources.contains (tablet->tablet_id ()));
172174 auto & entire_read_source = _all_read_sources[tablet->tablet_id ()];
@@ -176,8 +178,10 @@ Status ParallelScannerBuilder::_build_scanners_by_segment(std::list<ScannerSPtr>
176178 ExecEnv::GetInstance ()->storage_engine ().to_cloud ().tablet_hotspot ().count (*tablet);
177179 }
178180
179- // For each RowSet split in the read source, split by segment id and build
180- // one scanner per segment. Keep delete predicates shared.
181+ // Collect segments into scanners based on rows count instead of one scanner per segment
182+ TabletReader::ReadSource partitial_read_source;
183+ int64_t rows_collected = 0 ;
184+
181185 for (auto & rs_split : entire_read_source.rs_splits ) {
182186 auto reader = rs_split.rs_reader ;
183187 auto rowset = reader->rowset ();
@@ -188,21 +192,62 @@ Status ParallelScannerBuilder::_build_scanners_by_segment(std::list<ScannerSPtr>
188192 continue ;
189193 }
190194
191- // Build scanners for [i, i+1) segment range, without row-range slicing.
192- for (int64_t i = 0 ; i < rowset->num_segments (); ++i) {
193- RowSetSplits split (reader->clone ());
194- split.segment_offsets .first = i;
195- split.segment_offsets .second = i + 1 ;
196- // No row-ranges slicing; scan whole segment i.
197- DCHECK_GE (split.segment_offsets .second , split.segment_offsets .first + 1 );
195+ int64_t segment_start = 0 ;
196+ auto split = RowSetSplits (reader->clone ());
198197
199- TabletReader::ReadSource partitial_read_source;
198+ for (size_t i = 0 ; i < segments_rows.size (); ++i) {
199+ const size_t rows_of_segment = segments_rows[i];
200+
201+ // Check if adding this segment would exceed rows_per_scanner
202+ // 0.9: try to avoid splitting the segments into excessively small parts.
203+ if (rows_collected > 0 && (rows_collected + rows_of_segment > _rows_per_scanner &&
204+ rows_collected < _rows_per_scanner * 9 / 10 )) {
205+ // Create a new scanner with collected segments
206+ split.segment_offsets .first = segment_start;
207+ split.segment_offsets .second =
208+ i; // Range is [segment_start, i), including all segments from segment_start to i-1
209+
210+ DCHECK_GT (split.segment_offsets .second , split.segment_offsets .first );
211+
212+ partitial_read_source.rs_splits .emplace_back (std::move (split));
213+
214+ scanners.emplace_back (
215+ _build_scanner (tablet, version, _key_ranges,
216+ {std::move (partitial_read_source.rs_splits ),
217+ entire_read_source.delete_predicates }));
218+
219+ // Reset for next scanner
220+ partitial_read_source = TabletReader::ReadSource ();
221+ split = RowSetSplits (reader->clone ());
222+ segment_start = i;
223+ rows_collected = 0 ;
224+ }
225+
226+ // Add current segment to the current scanner
227+ rows_collected += rows_of_segment;
228+ }
229+
230+ // Add remaining segments in this rowset to a scanner
231+ if (rows_collected > 0 ) {
232+ split.segment_offsets .first = segment_start;
233+ split.segment_offsets .second = segments_rows.size ();
234+ DCHECK_GT (split.segment_offsets .second , split.segment_offsets .first );
200235 partitial_read_source.rs_splits .emplace_back (std::move (split));
236+ }
237+ }
201238
202- scanners.emplace_back (_build_scanner (tablet, version, _key_ranges,
203- {std::move (partitial_read_source.rs_splits ),
204- entire_read_source.delete_predicates }));
239+ // Add remaining segments across all rowsets to a scanner
240+ if (rows_collected > 0 ) {
241+ DCHECK_GT (partitial_read_source.rs_splits .size (), 0 );
242+ #ifndef NDEBUG
243+ for (auto & split : partitial_read_source.rs_splits ) {
244+ DCHECK (split.rs_reader != nullptr );
245+ DCHECK_LT (split.segment_offsets .first , split.segment_offsets .second );
205246 }
247+ #endif
248+ scanners.emplace_back (_build_scanner (tablet, version, _key_ranges,
249+ {std::move (partitial_read_source.rs_splits ),
250+ entire_read_source.delete_predicates }));
206251 }
207252 }
208253
0 commit comments