@@ -308,6 +308,23 @@ class RleRunDecoder {
308308 return to_read;
309309 }
310310
311+ // / Get a batch of values and count how many equal match_value
312+ [[nodiscard]] rle_size_t GetBatchWithCount (value_type* out, rle_size_t batch_size,
313+ rle_size_t value_bit_width,
314+ value_type match_value, int64_t * out_count) {
315+ if (ARROW_PREDICT_FALSE (remaining_count_ == 0 )) {
316+ return 0 ;
317+ }
318+
319+ const auto to_read = std::min (remaining_count_, batch_size);
320+ std::fill (out, out + to_read, value_);
321+ if (value_ == match_value) {
322+ *out_count += to_read;
323+ }
324+ remaining_count_ -= to_read;
325+ return to_read;
326+ }
327+
311328 private:
312329 value_type value_ = {};
313330 rle_size_t remaining_count_ = 0 ;
@@ -377,6 +394,15 @@ class BitPackedRunDecoder {
377394 return steps;
378395 }
379396
397+ // / Get a batch of values and count how many equal match_value
398+ [[nodiscard]] rle_size_t GetBatchWithCount (value_type* out, rle_size_t batch_size,
399+ rle_size_t value_bit_width,
400+ value_type match_value, int64_t * out_count) {
401+ auto steps = GetBatch (out, batch_size, value_bit_width);
402+ *out_count += std::count (out, out + steps, match_value);
403+ return steps;
404+ }
405+
380406 private:
381407 // / The pointer to the beginning of the run
382408 const uint8_t * data_ = nullptr ;
@@ -438,6 +464,10 @@ class RleBitPackedDecoder {
438464 // / left or if an error occurred.
439465 [[nodiscard]] rle_size_t GetBatch (value_type* out, rle_size_t batch_size);
440466
467+ // / Get a batch of values and count how many equal match_value
468+ [[nodiscard]] rle_size_t GetBatchWithCount (value_type* out, rle_size_t batch_size,
469+ value_type match_value, int64_t * out_count);
470+
441471 // / Like GetBatch but add spacing for null entries.
442472 // /
443473 // / Null entries will be set to an arbistrary value to avoid leaking private data.
@@ -483,6 +513,18 @@ class RleBitPackedDecoder {
483513 decoder_);
484514 }
485515
516+ // / Get a batch of values from the current run and return the number elements read.
517+ [[nodiscard]] rle_size_t RunGetBatchWithCount (value_type* out, rle_size_t batch_size,
518+ value_type match_value,
519+ int64_t * out_count) {
520+ return std::visit (
521+ [&](auto & dec) {
522+ return dec.GetBatchWithCount (out, batch_size, value_bit_width_, match_value,
523+ out_count);
524+ },
525+ decoder_);
526+ }
527+
486528 // / Call the parser with a single callable for all event types.
487529 template <typename Callable>
488530 void ParseWithCallable (Callable&& func);
@@ -1474,4 +1516,49 @@ inline void RleBitPackedEncoder::Clear() {
14741516 bit_writer_.Clear ();
14751517}
14761518
1519+ template <typename T>
1520+ auto RleBitPackedDecoder<T>::GetBatchWithCount(value_type* out, rle_size_t batch_size,
1521+ value_type match_value, int64_t * out_count)
1522+ -> rle_size_t {
1523+ using ControlFlow = RleBitPackedParser::ControlFlow;
1524+
1525+ rle_size_t values_read = 0 ;
1526+
1527+ // Remaining from a previous call that would have left some unread data from a run.
1528+ if (ARROW_PREDICT_FALSE (run_remaining () > 0 )) {
1529+ const auto read = RunGetBatchWithCount (out, batch_size, match_value, out_count);
1530+ values_read += read;
1531+ out += read;
1532+
1533+ // Either we fulfilled all the batch to be read or we finished remaining run.
1534+ if (ARROW_PREDICT_FALSE (values_read == batch_size)) {
1535+ return values_read;
1536+ }
1537+ ARROW_DCHECK (run_remaining () == 0 );
1538+ }
1539+
1540+ ParseWithCallable ([&](auto run) {
1541+ using RunDecoder = typename decltype (run)::template DecoderType<value_type>;
1542+
1543+ ARROW_DCHECK_LT (values_read, batch_size);
1544+ RunDecoder decoder (run, value_bit_width_);
1545+ const auto read =
1546+ decoder.GetBatchWithCount (out, batch_size - values_read, value_bit_width_,
1547+ match_value, out_count);
1548+ ARROW_DCHECK_LE (read, batch_size - values_read);
1549+ values_read += read;
1550+ out += read;
1551+
1552+ // Stop reading and store remaining decoder
1553+ if (ARROW_PREDICT_FALSE (values_read == batch_size || read == 0 )) {
1554+ decoder_ = std::move (decoder);
1555+ return ControlFlow::Break;
1556+ }
1557+
1558+ return ControlFlow::Continue;
1559+ });
1560+
1561+ return values_read;
1562+ }
1563+
14771564} // namespace arrow::util
0 commit comments