@@ -732,6 +732,7 @@ class NullValueReducer {
732
732
std::shared_ptr<PipelineContext> context_;
733
733
SegmentInMemory frame_;
734
734
size_t pos_;
735
+ size_t column_block_idx_;
735
736
DecodePathData shared_data_;
736
737
std::any& handler_data_;
737
738
const OutputFormat output_format_;
@@ -751,6 +752,7 @@ class NullValueReducer {
751
752
context_ (context),
752
753
frame_ (std::move(frame)),
753
754
pos_ (frame_.offset()),
755
+ column_block_idx_ (0 ),
754
756
shared_data_ (std::move(shared_data)),
755
757
handler_data_ (handler_data),
756
758
output_format_ (output_format),
@@ -761,18 +763,16 @@ class NullValueReducer {
761
763
return context_row.slice_and_key ().slice_ .row_range .first ;
762
764
}
763
765
764
- void backfill_all_zero_validity_bitmaps (size_t offset_bytes_start, size_t offset_bytes_end_idx) {
765
- // Explanation: offset_bytes_start and offset_bytes_end should both be elements of block_offsets by
766
- // construction. We must add an all zeros validity bitmap for each row-slice read from storage where this
767
- // column was missing, in order to correctly populate the Arrow record-batches for the output
766
+ void backfill_all_zero_validity_bitmaps_up_to (std::optional<size_t > up_to_block_offset) {
767
+ // Fills up all validity bitmaps with zeros from `column_block_idx_` until reaching `up_to_block_offset`.
768
+ // If `up_to_block_offset` is `std::nullopt` then fills up until the end of the column.
768
769
const auto & block_offsets = column_.block_offsets ();
769
- auto start_it = std::ranges::lower_bound (block_offsets, offset_bytes_start);
770
- util::check (start_it != block_offsets.cend () && *start_it == offset_bytes_start,
771
- " NullValueReducer: Failed to find offset_bytes_start {} in block_offsets {}" ,
772
- offset_bytes_start, block_offsets);
773
- for (auto idx = static_cast <size_t >(std::distance (block_offsets.begin (), start_it)); idx < offset_bytes_end_idx; ++idx) {
774
- auto rows = (block_offsets.at (idx + 1 ) - block_offsets.at (idx)) / type_bytes_;
775
- create_dense_bitmap_all_zeros (block_offsets.at (idx), rows, column_, AllocationType::DETACHABLE);
770
+ for (; column_block_idx_ < block_offsets.size () - 1 ; ++column_block_idx_) {
771
+ if (up_to_block_offset.has_value () && block_offsets.at (column_block_idx_) >= up_to_block_offset.value ()) {
772
+ break ;
773
+ }
774
+ auto rows = (block_offsets.at (column_block_idx_ + 1 ) - block_offsets.at (column_block_idx_)) / type_bytes_;
775
+ create_dense_bitmap_all_zeros (block_offsets.at (column_block_idx_), rows, column_, AllocationType::DETACHABLE);
776
776
}
777
777
}
778
778
@@ -783,18 +783,20 @@ class NullValueReducer {
783
783
if (current_pos != pos_) {
784
784
const auto num_rows = current_pos - pos_;
785
785
const auto start_row = pos_ - frame_.offset ();
786
+ const auto end_row = current_pos - frame_.offset ();
786
787
if (const std::shared_ptr<TypeHandler>& handler = get_type_handler (output_format_, column_.type ()); handler) {
787
788
handler->default_initialize (column_.buffer (), start_row * handler->type_size (), num_rows * handler->type_size (), shared_data_, handler_data_);
788
789
} else if (output_format_ != OutputFormat::ARROW) {
789
790
// Arrow does not care what values are in the main buffer where the validity bitmap is zero
790
791
column_.default_initialize_rows (start_row, num_rows, false , default_value_);
791
792
}
792
793
if (output_format_ == OutputFormat::ARROW) {
793
- backfill_all_zero_validity_bitmaps (start_row * type_bytes_, context_row. index () );
794
+ backfill_all_zero_validity_bitmaps_up_to (end_row * type_bytes_);
794
795
}
795
- pos_ = current_pos + sz_to_advance;
796
- } else {
797
- pos_ += sz_to_advance;
796
+ }
797
+ pos_ = current_pos + sz_to_advance;
798
+ if (output_format_ == OutputFormat::ARROW) {
799
+ ++column_block_idx_;
798
800
}
799
801
}
800
802
@@ -812,7 +814,7 @@ class NullValueReducer {
812
814
column_.default_initialize_rows (start_row, num_rows, false , default_value_);
813
815
}
814
816
if (output_format_ == OutputFormat::ARROW) {
815
- backfill_all_zero_validity_bitmaps (start_row * type_bytes_, column_. block_offsets (). size () - 1 );
817
+ backfill_all_zero_validity_bitmaps_up_to (std:: nullopt );
816
818
}
817
819
}
818
820
}
0 commit comments