@@ -426,6 +426,14 @@ class RleBitPackedDecoder {
426426 // / values.
427427 [[nodiscard]] bool Get (value_type* val);
428428
429+ // Get the next logical value and num_repeats within the specified batch_size.
430+ [[nodiscard]] bool GetNextValueAndNumRepeats (value_type* val, int * num_repeats, int batch_size);
431+
432+ // / Like GetNextValueAndNumRepeats but add spacing for null entries.
433+ [[nodiscard]] bool GetNextValueAndNumRepeatsSpaced (value_type* val, bool * is_null,
434+ int * num_repeats, int batch_size,
435+ const uint8_t * valid_bits, int64_t valid_bits_offset);
436+
429437 // / Get a batch of values return the number of decoded elements.
430438 // / May write fewer elements to the output than requested if there are not enough values
431439 // / left or if an error occurred.
@@ -722,7 +730,6 @@ void RleBitPackedDecoder<T>::ParseWithCallable(Callable&& func) {
722730 auto OnBitPackedRun (BitPackedRun run) { return func (std::move (run)); }
723731 auto OnRleRun (RleRun run) { return func (std::move (run)); }
724732 } handler{std::move (func)};
725-
726733 parser_.Parse (std::move (handler));
727734}
728735
@@ -731,6 +738,69 @@ bool RleBitPackedDecoder<T>::Get(value_type* val) {
731738 return GetBatch (val, 1 ) == 1 ;
732739}
733740
741+ template <typename T>
742+ bool RleBitPackedDecoder<T>::GetNextValueAndNumRepeats(value_type* val, int * num_repeats, int batch_size) {
743+ using ControlFlow = RleBitPackedParser::ControlFlow;
744+
745+ if (ARROW_PREDICT_FALSE (run_remaining () > 0 )) {
746+ if (std::holds_alternative<BitPackedRunDecoder<value_type>>(decoder_)) {
747+ auto & decoder = std::get<BitPackedRunDecoder<value_type>>(decoder_);
748+ *num_repeats = 1 ;
749+ return decoder.Get (val, value_bit_width_);
750+ } else {
751+ auto & decoder = std::get<RleRunDecoder<value_type>>(decoder_);
752+ *num_repeats = std::min (decoder.remaining (), batch_size);
753+ ARROW_DCHECK_EQ (decoder.Advance (*num_repeats, value_bit_width_), *num_repeats);
754+ return decoder.Get (val, value_bit_width_);
755+ }
756+ }
757+
758+ bool read_new_value = false ;
759+
760+ ParseWithCallable ([&](auto run) {
761+ if constexpr (std::is_same_v<decltype (run), BitPackedRun>) {
762+ BitPackedRunDecoder<T> decoder (run, value_bit_width_);
763+ read_new_value = decoder.Get (val, value_bit_width_);
764+ *num_repeats = 1 ;
765+ decoder_ = std::move (decoder);
766+ return ControlFlow::Break;
767+ }
768+ else {
769+ RleRunDecoder<T> decoder (run, value_bit_width_);
770+ *num_repeats = std::min (decoder.remaining (), batch_size);
771+ read_new_value = decoder.Get (val, value_bit_width_);
772+ ARROW_DCHECK_EQ (decoder.Advance (*num_repeats, value_bit_width_), *num_repeats);
773+ decoder_ = std::move (decoder);
774+ return ControlFlow::Break;
775+ }
776+ });
777+
778+ return read_new_value;
779+ }
780+
781+ template <typename T>
782+ bool RleBitPackedDecoder<T>::GetNextValueAndNumRepeatsSpaced(value_type* val, bool * is_null,
783+ int * num_repeats, int batch_size,
784+ const uint8_t * valid_bits, int64_t valid_bits_offset) {
785+ arrow::internal::BitRunReader bit_reader (valid_bits, valid_bits_offset,
786+ /* length=*/ batch_size);
787+ arrow::internal::BitRun valid_run = bit_reader.NextRun ();
788+ while (ARROW_PREDICT_FALSE (valid_run.length == 0 )) {
789+ valid_run = bit_reader.NextRun ();
790+ }
791+ ARROW_DCHECK_GT (batch_size, 0 );
792+ ARROW_DCHECK_GT (valid_run.length , 0 );
793+ if (valid_run.set ) {
794+ return GetNextValueAndNumRepeats (
795+ val, num_repeats,
796+ static_cast <int >(std::min (valid_run.length , static_cast <int64_t >(batch_size))));
797+ } else {
798+ *is_null = true ;
799+ *num_repeats = static_cast <int >(valid_run.length );
800+ }
801+ return true ;
802+ }
803+
734804template <typename T>
735805auto RleBitPackedDecoder<T>::GetBatch(value_type* out, rle_size_t batch_size)
736806 -> rle_size_t {
0 commit comments