Skip to content

Commit 3550957

Browse files
committed
EXP: [C++][Parquet] Simplify GetVlqInt
1 parent 55587ef commit 3550957

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

cpp/src/arrow/util/bit_stream_utils_internal.h

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,10 @@ class BitReader {
160160
/// are not enough bits left.
161161
bool Advance(int64_t num_bits);
162162

163+
/// Advance the stream by a number of bytes, ignoring remaning bits.
164+
/// Returns true if succeed or false if there are not enough bits left.
165+
bool AdvanceBytes(int64_t num_bytes);
166+
163167
/// Reads a vlq encoded int from the stream. The encoded int must start at
164168
/// the beginning of a byte. Return false if there were not enough bytes in
165169
/// the buffer.
@@ -328,6 +332,17 @@ inline bool BitReader::Advance(int64_t num_bits) {
328332
return true;
329333
}
330334

335+
inline bool BitReader::AdvanceBytes(int64_t num_bytes) {
336+
if (ARROW_PREDICT_FALSE(num_bytes > max_bytes_ - byte_offset_)) {
337+
return false;
338+
}
339+
byte_offset_ += num_bytes;
340+
bit_offset_ = 0;
341+
buffered_values_ =
342+
detail::ReadLittleEndianWord(buffer_ + byte_offset_, max_bytes_ - byte_offset_);
343+
return true;
344+
}
345+
331346
template <typename Int>
332347
inline bool BitWriter::PutVlqInt(Int v) {
333348
static_assert(std::is_integral_v<Int>);
@@ -362,34 +377,19 @@ inline bool BitReader::GetVlqInt(Int* v) {
362377
static_assert(std::is_integral_v<Int>);
363378

364379
// The data that we will pass to the LEB128 parser
365-
// In all case, we read a byte-aligned value, skipping remaining bits
366-
const uint8_t* data = NULLPTR;
367-
int max_size = 0;
368-
369-
// Number of bytes left in the buffered values, not including the current
370-
// byte (i.e., there may be an additional fraction of a byte).
371-
const int bytes_left_in_cache =
372-
sizeof(buffered_values_) - static_cast<int>(bit_util::BytesForBits(bit_offset_));
373-
374-
// If there are clearly enough bytes left we can try to parse from the cache
375-
if (bytes_left_in_cache >= kMaxLEB128ByteLenFor<Int>) {
376-
max_size = bytes_left_in_cache;
377-
data = reinterpret_cast<const uint8_t*>(&buffered_values_) +
378-
bit_util::BytesForBits(bit_offset_);
379-
// Otherwise, we try straight from buffer (ignoring few bytes that may be cached)
380-
} else {
381-
max_size = bytes_left();
382-
data = buffer_ + (max_bytes_ - max_size);
383-
}
380+
// We read a byte-aligned value, skipping remaining bits.
381+
// Also, we don't bother with the cache since the decoding would be the same.
382+
int max_size = bytes_left();
383+
const uint8_t* data = buffer_ + (max_bytes_ - max_size);
384384

385385
const auto bytes_read = bit_util::ParseLeadingLEB128(data, max_size, v);
386386
if (ARROW_PREDICT_FALSE(bytes_read == 0)) {
387387
// Corrupt LEB128
388388
return false;
389389
}
390390

391-
// Advance for the bytes we have read + the bits we skipped
392-
return Advance((8 * bytes_read) + (bit_offset_ % 8));
391+
// Advance for the bytes we have read
392+
return AdvanceBytes(bytes_read);
393393
}
394394

395395
template <typename Int>

0 commit comments

Comments
 (0)