@@ -160,6 +160,10 @@ class BitReader {
160160 // / are not enough bits left.
161161 bool Advance (int64_t num_bits);
162162
163+ // / Advance the stream by a number of bytes, ignoring remaning bits.
164+ // / Returns true if succeed or false if there are not enough bits left.
165+ bool AdvanceBytes (int64_t num_bytes);
166+
163167 // / Reads a vlq encoded int from the stream. The encoded int must start at
164168 // / the beginning of a byte. Return false if there were not enough bytes in
165169 // / the buffer.
@@ -328,6 +332,17 @@ inline bool BitReader::Advance(int64_t num_bits) {
328332 return true ;
329333}
330334
335+ inline bool BitReader::AdvanceBytes (int64_t num_bytes) {
336+ if (ARROW_PREDICT_FALSE (num_bytes > max_bytes_ - byte_offset_)) {
337+ return false ;
338+ }
339+ byte_offset_ += num_bytes;
340+ bit_offset_ = 0 ;
341+ buffered_values_ =
342+ detail::ReadLittleEndianWord (buffer_ + byte_offset_, max_bytes_ - byte_offset_);
343+ return true ;
344+ }
345+
331346template <typename Int>
332347inline bool BitWriter::PutVlqInt (Int v) {
333348 static_assert (std::is_integral_v<Int>);
@@ -362,34 +377,19 @@ inline bool BitReader::GetVlqInt(Int* v) {
362377 static_assert (std::is_integral_v<Int>);
363378
364379 // The data that we will pass to the LEB128 parser
365- // In all case, we read a byte-aligned value, skipping remaining bits
366- const uint8_t * data = NULLPTR;
367- int max_size = 0 ;
368-
369- // Number of bytes left in the buffered values, not including the current
370- // byte (i.e., there may be an additional fraction of a byte).
371- const int bytes_left_in_cache =
372- sizeof (buffered_values_) - static_cast <int >(bit_util::BytesForBits (bit_offset_));
373-
374- // If there are clearly enough bytes left we can try to parse from the cache
375- if (bytes_left_in_cache >= kMaxLEB128ByteLenFor <Int>) {
376- max_size = bytes_left_in_cache;
377- data = reinterpret_cast <const uint8_t *>(&buffered_values_) +
378- bit_util::BytesForBits (bit_offset_);
379- // Otherwise, we try straight from buffer (ignoring few bytes that may be cached)
380- } else {
381- max_size = bytes_left ();
382- data = buffer_ + (max_bytes_ - max_size);
383- }
380+ // We read a byte-aligned value, skipping remaining bits.
381+ // Also, we don't bother with the cache since the decoding would be the same.
382+ int max_size = bytes_left ();
383+ const uint8_t * data = buffer_ + (max_bytes_ - max_size);
384384
385385 const auto bytes_read = bit_util::ParseLeadingLEB128 (data, max_size, v);
386386 if (ARROW_PREDICT_FALSE (bytes_read == 0 )) {
387387 // Corrupt LEB128
388388 return false ;
389389 }
390390
391- // Advance for the bytes we have read + the bits we skipped
392- return Advance (( 8 * bytes_read) + (bit_offset_ % 8 ) );
391+ // Advance for the bytes we have read
392+ return AdvanceBytes ( bytes_read);
393393}
394394
395395template <typename Int>
0 commit comments