Skip to content

Commit 287f136

Browse files
committed
Remove BitReader from BitPackedRunDecoder
1 parent 1efc260 commit 287f136

File tree

3 files changed

+45
-38
lines changed

3 files changed

+45
-38
lines changed

cpp/src/arrow/util/bit_stream_utils_internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
268268
}
269269

270270
if constexpr (std::is_same_v<T, bool>) {
271-
::arrow::internal::unpack(buffer_ + byte_offset_, reinterpret_cast<bool*>(v),
272-
batch_size, num_bits, bit_offset_);
271+
::arrow::internal::unpack(buffer_ + byte_offset_, v, batch_size, num_bits,
272+
bit_offset_);
273273

274274
} else {
275275
::arrow::internal::unpack(buffer_ + byte_offset_,

cpp/src/arrow/util/rle_encoding_internal.h

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include "arrow/util/bit_run_reader.h"
3030
#include "arrow/util/bit_stream_utils_internal.h"
3131
#include "arrow/util/bit_util.h"
32+
#include "arrow/util/bpacking_internal.h"
33+
#include "arrow/util/logging.h"
3234
#include "arrow/util/macros.h"
3335

3436
namespace arrow::util {
@@ -278,10 +280,9 @@ class RleRunDecoder {
278280
/// Return the repeated value of this decoder.
279281
constexpr value_type value() const { return value_; }
280282

281-
/// Try to advance by as many values as provided.
283+
/// Advance by as many values as provided or until exhaustion of the decoder.
282284
/// Return the number of values skipped.
283-
/// May advance by less than asked for if there are not enough values left.
284-
[[nodiscard]] rle_size_t Advance(rle_size_t batch_size, rle_size_t value_bit_width) {
285+
[[nodiscard]] rle_size_t Advance(rle_size_t batch_size) {
285286
const auto steps = std::min(batch_size, remaining_count_);
286287
remaining_count_ -= steps;
287288
return steps;
@@ -331,52 +332,58 @@ class BitPackedRunDecoder {
331332
}
332333

333334
void Reset(const RunType& run, rle_size_t value_bit_width) noexcept {
334-
remaining_count_ = run.values_count();
335335
ARROW_DCHECK_GE(value_bit_width, 0);
336336
ARROW_DCHECK_LE(value_bit_width, 64);
337-
bit_reader_.Reset(run.raw_data_ptr(), run.raw_data_size(value_bit_width));
337+
data_ = run.raw_data_ptr();
338+
values_count_ = run.values_count();
339+
values_read_ = 0;
338340
}
339341

340342
/// Return the number of values that can be advanced.
341-
constexpr rle_size_t remaining() const { return remaining_count_; }
343+
constexpr rle_size_t remaining() const { return values_count_ - values_read_; }
342344

343-
/// Try to advance by as many values as provided.
344-
/// Return the number of values skipped or 0 if it fail to advance.
345-
/// May advance by less than asked for if there are not enough values left.
346-
[[nodiscard]] rle_size_t Advance(rle_size_t batch_size, rle_size_t value_bit_width) {
347-
const auto steps = std::min(batch_size, remaining_count_);
348-
if (bit_reader_.Advance(steps * value_bit_width)) {
349-
remaining_count_ -= steps;
350-
return steps;
351-
}
352-
return 0;
345+
/// Advance by as many values as provided or until exhaustion of the decoder.
346+
/// Return the number of values skipped.
347+
[[nodiscard]] rle_size_t Advance(rle_size_t batch_size) {
348+
const auto steps = std::min(batch_size, remaining());
349+
values_read_ += steps;
350+
return steps;
353351
}
354352

355-
/// Get the next value and return false if there are no more or an error occurred.
353+
/// Get the next value and return false if there are no more.
356354
[[nodiscard]] constexpr bool Get(value_type* out_value, rle_size_t value_bit_width) {
357355
return GetBatch(out_value, 1, value_bit_width) == 1;
358356
}
359357

360358
/// Get a batch of values return the number of decoded elements.
361359
/// May write fewer elements to the output than requested if there are not enough values
362-
/// left or if an error occurred.
360+
/// left.
363361
[[nodiscard]] rle_size_t GetBatch(value_type* out, rle_size_t batch_size,
364362
rle_size_t value_bit_width) {
365-
if (ARROW_PREDICT_FALSE(remaining_count_ == 0)) {
366-
return 0;
367-
}
363+
const auto steps = std::min(batch_size, remaining());
364+
const auto bits_read = values_read_ * value_bit_width;
365+
const auto* unread_data = data_ + bits_read / 8;
366+
const auto bit_offset = bits_read % 8;
368367

369-
const auto to_read = std::min(remaining_count_, batch_size);
370-
const auto actual_read = bit_reader_.GetBatch(value_bit_width, out, to_read);
371-
// There should not be any reason why the actual read would be different
372-
// but this is error resistant.
373-
remaining_count_ -= actual_read;
374-
return actual_read;
368+
if constexpr (std::is_same_v<T, bool>) {
369+
::arrow::internal::unpack(unread_data, out, steps, value_bit_width, bit_offset);
370+
371+
} else {
372+
::arrow::internal::unpack(unread_data,
373+
reinterpret_cast<std::make_unsigned_t<value_type>*>(out),
374+
steps, value_bit_width, bit_offset);
375+
}
376+
values_read_ += steps;
377+
return steps;
375378
}
376379

377380
private:
378-
::arrow::bit_util::BitReader bit_reader_ = {};
379-
rle_size_t remaining_count_ = 0;
381+
/// The pointer to the beginning of the run
382+
const uint8_t* data_ = nullptr;
383+
/// The total number of values in the run
384+
rle_size_t values_count_ = 0;
385+
/// The number of values read by the decoder
386+
rle_size_t values_read_ = 0;
380387

381388
static_assert(std::is_integral_v<value_type>,
382389
"This class is meant to decode positive integers");
@@ -895,7 +902,7 @@ auto RunGetSpaced(Converter* converter, typename Converter::out_type* out,
895902
return {0, 0};
896903
}
897904
converter->WriteRepeated(out, out + batch.total_read(), value);
898-
const auto actual_values_read = decoder->Advance(batch.values_read(), value_bit_width);
905+
const auto actual_values_read = decoder->Advance(batch.values_read());
899906
// We always cropped the number of values_read by the remaining values in the run.
900907
// What's more the RLE decoder should not encounter any errors.
901908
ARROW_DCHECK_EQ(actual_values_read, batch.values_read());

cpp/src/arrow/util/rle_encoding_test.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ void TestRleDecoder(std::vector<uint8_t> bytes, rle_size_t value_count,
290290
EXPECT_EQ(vals.at(0), expected_value);
291291
EXPECT_EQ(decoder.remaining(), value_count - read);
292292

293-
EXPECT_EQ(decoder.Advance(3, bit_width), 3);
293+
EXPECT_EQ(decoder.Advance(3), 3);
294294
read += 3;
295295
EXPECT_EQ(decoder.remaining(), value_count - read);
296296

@@ -302,9 +302,9 @@ void TestRleDecoder(std::vector<uint8_t> bytes, rle_size_t value_count,
302302
EXPECT_EQ(decoder.remaining(), value_count - read);
303303

304304
// Exhaust iteration
305-
EXPECT_EQ(decoder.Advance(value_count - read, bit_width), value_count - read);
305+
EXPECT_EQ(decoder.Advance(value_count - read), value_count - read);
306306
EXPECT_EQ(decoder.remaining(), 0);
307-
EXPECT_EQ(decoder.Advance(1, bit_width), 0);
307+
EXPECT_EQ(decoder.Advance(1), 0);
308308
vals = {0, 0};
309309
EXPECT_EQ(decoder.Get(vals.data(), bit_width), 0);
310310
EXPECT_EQ(vals.at(0), 0);
@@ -350,7 +350,7 @@ void TestBitPackedDecoder(std::vector<uint8_t> bytes, rle_size_t value_count,
350350
read += 1;
351351
EXPECT_EQ(decoder.remaining(), value_count - read);
352352

353-
EXPECT_EQ(decoder.Advance(3, bit_width), 3);
353+
EXPECT_EQ(decoder.Advance(3), 3);
354354
read += 3;
355355
EXPECT_EQ(decoder.remaining(), value_count - read);
356356

@@ -362,9 +362,9 @@ void TestBitPackedDecoder(std::vector<uint8_t> bytes, rle_size_t value_count,
362362
EXPECT_EQ(decoder.remaining(), value_count - read);
363363

364364
// Exhaust iteration
365-
EXPECT_EQ(decoder.Advance(value_count - read, bit_width), value_count - read);
365+
EXPECT_EQ(decoder.Advance(value_count - read), value_count - read);
366366
EXPECT_EQ(decoder.remaining(), 0);
367-
EXPECT_EQ(decoder.Advance(1, bit_width), 0);
367+
EXPECT_EQ(decoder.Advance(1), 0);
368368
vals = {0, 0};
369369
EXPECT_EQ(decoder.Get(vals.data(), bit_width), 0);
370370
EXPECT_EQ(vals.at(0), 0);

0 commit comments

Comments
 (0)