@@ -13,6 +13,7 @@ namespace ErrorCodes
1313{
1414 extern const int ARGUMENT_OUT_OF_BOUND;
1515 extern const int CANNOT_READ_ALL_DATA;
16+ extern const int LOGICAL_ERROR;
1617}
1718
1819MergeTreeReaderStream::MergeTreeReaderStream (
@@ -41,14 +42,17 @@ MergeTreeReaderStream::MergeTreeReaderStream(
4142{
4243}
4344
45+ void MergeTreeReaderStream::loadMarks ()
46+ {
47+ if (!marks_getter)
48+ marks_getter = marks_loader->loadMarks ();
49+ }
50+
4451void MergeTreeReaderStream::init ()
4552{
4653 if (initialized)
4754 return ;
4855
49- initialized = true ;
50- marks_getter = marks_loader->loadMarks ();
51-
5256 // / Compute the size of the buffer.
5357 auto [max_mark_range_bytes, sum_mark_range_bytes] = estimateMarkRangeBytes (all_mark_ranges);
5458
@@ -110,11 +114,15 @@ void MergeTreeReaderStream::init()
110114 data_buffer = non_cached_buffer.get ();
111115 compressed_data_buffer = non_cached_buffer.get ();
112116 }
117+
118+ initialized = true ;
113119}
114120
115121void MergeTreeReaderStream::seekToMarkAndColumn (size_t row_index, size_t column_position)
116122{
117123 init ();
124+ loadMarks ();
125+
118126 const auto & mark = marks_getter->getMark (row_index, column_position);
119127
120128 try
@@ -193,7 +201,7 @@ CompressedReadBufferBase * MergeTreeReaderStream::getCompressedDataBuffer()
193201 return compressed_data_buffer;
194202}
195203
196- size_t MergeTreeReaderStreamSingleColumn::getRightOffset (size_t right_mark) const
204+ size_t MergeTreeReaderStreamSingleColumn::getRightOffset (size_t right_mark)
197205{
198206 // / NOTE: if we are reading the whole file, then right_mark == marks_count
199207 // / and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
@@ -202,7 +210,8 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons
202210 if (marks_count == 0 )
203211 return 0 ;
204212
205- assert (right_mark <= marks_count);
213+ chassert (right_mark <= marks_count);
214+ loadMarks ();
206215
207216 if (right_mark == 0 )
208217 return marks_getter->getMark (right_mark, 0 ).offset_in_compressed_file ;
@@ -281,9 +290,9 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons
281290 return file_size;
282291}
283292
284- std::pair<size_t , size_t > MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes (const MarkRanges & mark_ranges) const
293+ std::pair<size_t , size_t > MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes (const MarkRanges & mark_ranges)
285294{
286- assert (marks_getter != nullptr );
295+ loadMarks ( );
287296
288297 size_t max_range_bytes = 0 ;
289298 size_t sum_range_bytes = 0 ;
@@ -302,7 +311,34 @@ std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumn::estimateMarkRangeBy
302311 return {max_range_bytes, sum_range_bytes};
303312}
304313
305- size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn (size_t right_mark_non_included, size_t column_position) const
314+ size_t MergeTreeReaderStreamSingleColumnWholePart::getRightOffset (size_t right_mark)
315+ {
316+ if (right_mark != marks_count)
317+ {
318+ throw Exception (ErrorCodes::LOGICAL_ERROR,
319+ " Expected one right mark: {}, got: {}" ,
320+ marks_count, right_mark);
321+ }
322+ return file_size;
323+ }
324+
325+ std::pair<size_t , size_t > MergeTreeReaderStreamSingleColumnWholePart::estimateMarkRangeBytes (const MarkRanges & mark_ranges)
326+ {
327+ if (!mark_ranges.isOneRangeForWholePart (marks_count))
328+ {
329+ throw Exception (ErrorCodes::LOGICAL_ERROR,
330+ " Expected one mark range that covers the whole part, got: {}" ,
331+ mark_ranges.describe ());
332+ }
333+ return {file_size, file_size};
334+ }
335+
336+ void MergeTreeReaderStreamSingleColumnWholePart::seekToMark (size_t )
337+ {
338+ throw Exception (ErrorCodes::LOGICAL_ERROR, " MergeTreeReaderStreamSingleColumnWholePart cannot seek to marks" );
339+ }
340+
341+ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn (size_t right_mark_non_included, size_t column_position)
306342{
307343 // / NOTE: if we are reading the whole file, then right_mark == marks_count
308344 // / and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
@@ -311,7 +347,8 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ
311347 if (marks_count == 0 )
312348 return 0 ;
313349
314- assert (right_mark_non_included <= marks_count);
350+ chassert (right_mark_non_included <= marks_count);
351+ loadMarks ();
315352
316353 if (right_mark_non_included == 0 )
317354 return marks_getter->getMark (right_mark_non_included, column_position).offset_in_compressed_file ;
@@ -347,9 +384,9 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ
347384}
348385
349386std::pair<size_t , size_t >
350- MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn (const MarkRanges & mark_ranges, size_t column_position) const
387+ MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn (const MarkRanges & mark_ranges, size_t column_position)
351388{
352- assert (marks_getter != nullptr );
389+ loadMarks ( );
353390
354391 // / As a maximal range we return the maximal size of a whole stripe.
355392 size_t max_range_bytes = 0 ;
@@ -386,8 +423,9 @@ MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const Mark
386423 return {max_range_bytes, sum_range_bytes};
387424}
388425
389- MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark (size_t row_index, size_t column_position) const
426+ MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark (size_t row_index, size_t column_position)
390427{
428+ loadMarks ();
391429 const auto & current_mark = marks_getter->getMark (row_index, column_position);
392430
393431 if (marks_getter->getNumColumns () == 1 )
@@ -434,27 +472,27 @@ MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeM
434472 return marks_getter->getMark (mark_index + 1 , column_position + 1 );
435473}
436474
437- size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset (size_t right_mark_non_included) const
475+ size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset (size_t right_mark_non_included)
438476{
439477 return getRightOffsetOneColumn (right_mark_non_included, column_position);
440478}
441479
442- std::pair<size_t , size_t > MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes (const MarkRanges & mark_ranges) const
480+ std::pair<size_t , size_t > MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes (const MarkRanges & mark_ranges)
443481{
444482 return estimateMarkRangeBytesOneColumn (mark_ranges, column_position);
445483}
446484
447- size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset (size_t right_mark_non_included) const
485+ size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset (size_t right_mark_non_included)
448486{
449487 return getRightOffsetOneColumn (right_mark_non_included, marks_loader->getNumColumns () - 1 );
450488}
451489
452- std::pair<size_t , size_t > MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes (const MarkRanges & mark_ranges) const
490+ std::pair<size_t , size_t > MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes (const MarkRanges & mark_ranges)
453491{
454492 size_t max_range_bytes = 0 ;
455493 size_t sum_range_bytes = 0 ;
456494
457- for (size_t i = 0 ; i < marks_getter ->getNumColumns (); ++i)
495+ for (size_t i = 0 ; i < marks_loader ->getNumColumns (); ++i)
458496 {
459497 auto [current_max, current_sum] = estimateMarkRangeBytesOneColumn (mark_ranges, i);
460498
0 commit comments