Skip to content

Commit 5d666a2

Browse files
committed
Optimize RLE+ decoding by 20%
``` decode time: [15.350 us 15.388 us 15.440 us] change: [-19.388% -19.130% -18.910%] (p = 0.00 < 0.05) Performance has improved. decode_encode time: [30.458 us 30.542 us 30.659 us] change: [-13.335% -13.014% -12.640%] (p = 0.00 < 0.05) Performance has improved. ``` Signed-off-by: Jakub Sztandera <[email protected]>
1 parent 8b37245 commit 5d666a2

File tree

1 file changed

+45
-37
lines changed

1 file changed

+45
-37
lines changed

ipld/bitfield/src/rleplus/reader.rs

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,13 @@ const VARINT_MAX_BYTES: usize = 10;
99
/// A `BitReader` allows for efficiently reading bits from a byte buffer, up to a byte at a time.
1010
///
1111
/// It works by always storing at least the next 8 bits in `bits`, which lets us conveniently
12-
/// and efficiently read bits that cross a byte boundary. It's filled with the bits from `next_byte`
13-
/// after every read operation, which is in turn replaced by the next byte from `bytes` as soon
14-
/// as the next read might read bits from `next_byte`.
12+
/// and efficiently read bits that cross a byte boundary.
1513
pub struct BitReader<'a> {
1614
/// The bytes that have not been read from yet.
1715
bytes: &'a [u8],
18-
/// The next byte from `bytes` to be added to `bits`.
19-
next_byte: u8,
2016
/// The next bits to be read.
21-
bits: u16,
17+
bits: u64,
18+
2219
/// The number of bits in `bits` from bytes that came before `next_byte` (at least 8, at most 15).
2320
num_bits: u32,
2421
}
@@ -31,51 +28,60 @@ impl<'a> BitReader<'a> {
3128
if bytes.last() == Some(&0) {
3229
return Err(Error::NotMinimal);
3330
}
31+
let mut bits = 0u64;
32+
for i in 0..2 {
33+
let byte = bytes.get(i).unwrap_or(&0);
34+
bits |= (*byte as u64) << (8*i);
35+
}
3436

35-
let &byte1 = bytes.get(0).unwrap_or(&0);
36-
let &byte2 = bytes.get(1).unwrap_or(&0);
37-
let bytes = if bytes.len() > 2 { &bytes[2..] } else { &[] };
37+
let bytes = bytes.get(2..).unwrap_or(&[]);
3838

3939
Ok(Self {
4040
bytes,
41-
bits: byte1 as u16,
42-
next_byte: byte2,
43-
num_bits: 8,
41+
bits,
42+
num_bits: 16,
4443
})
4544
}
4645

47-
/// Reads a given number of bits from the buffer. Will keep returning 0 once
46+
/// Peeks a given number of bits from the buffer.Will keep returning 0 once
4847
/// the buffer has been exhausted.
49-
pub fn read(&mut self, num_bits: u32) -> u8 {
48+
pub fn peek(&self, num_bits: u32) -> u8 {
5049
debug_assert!(num_bits <= 8);
5150

5251
// creates a mask with a `num_bits` number of 1s in order
5352
// to get only the bits we need from `self.bits`
5453
let mask = (1 << num_bits) - 1;
55-
let res = (self.bits & mask) as u8;
54+
(self.bits & mask) as u8
55+
}
5656

57-
// removes the bits we've just read from local storage
58-
// because we don't need them anymore
57+
/// Drops a number of bits from the buffer
58+
pub fn drop(&mut self, num_bits: u32) {
59+
debug_assert!(num_bits <= 8);
60+
61+
// removes the bits
5962
self.bits >>= num_bits;
6063
self.num_bits -= num_bits;
6164

62-
// this unconditionally adds the next byte to `bits`,
63-
// regardless of whether there's enough space or not. the
64-
// point is to make sure that `bits` always contains
65-
// at least the next 8 bits to be read
66-
self.bits |= (self.next_byte as u16) << self.num_bits;
6765

68-
// if fewer than 8 bits remain, we increment `self.num_bits`
69-
// to include the bits from `next_byte` (which is already
70-
// contained in `bits`) and we update `next_byte` with the
71-
// data to be read after that
66+
// not sure why this being outside of the if improves the performance
67+
// bit it does, probably related to keeping caches warm
68+
let byte = self.bytes.get(0).unwrap_or(&0);
69+
self.bits |= (*byte as u64) << self.num_bits;
70+
71+
// if fewer than 8 bits remain, we skip to loading the next byte
7272
if self.num_bits < 8 {
7373
self.num_bits += 8;
74-
75-
let (&next_byte, bytes) = self.bytes.split_first().unwrap_or((&0, &[]));
76-
self.next_byte = next_byte;
77-
self.bytes = bytes;
74+
self.bytes = self.bytes.get(1..).unwrap_or(&[]);
7875
}
76+
}
77+
78+
/// Reads a given number of bits from the buffer. Will keep returning 0 once
79+
/// the buffer has been exhausted.
80+
pub fn read(&mut self, num_bits: u32) -> u8 {
81+
debug_assert!(num_bits <= 8);
82+
83+
let res = self.peek(num_bits);
84+
self.drop(num_bits);
7985

8086
res
8187
}
@@ -114,22 +120,24 @@ impl<'a> BitReader<'a> {
114120
return Ok(None);
115121
}
116122

117-
let prefix_0 = self.read(1);
118-
let len = if prefix_0 == 1 {
123+
let peek6 = self.peek(6);
124+
125+
let len = if peek6 & 0b1 != 0 {
119126
// Block Single (prefix 1)
127+
self.drop(1);
120128
1
121129
} else {
122-
let prefix_1 = self.read(1);
123-
124-
if prefix_1 == 1 {
130+
if peek6 & 0b10 != 0 {
125131
// Block Short (prefix 01)
126-
let val = self.read(4) as u64;
132+
let val = ((peek6 >> 2) & 0x0f) as u64;
133+
self.drop(6);
127134
if val < 2 {
128135
return Err(Error::NotMinimal);
129136
}
130137
val
131138
} else {
132139
// Block Long (prefix 00)
140+
self.drop(2);
133141
let val = self.read_varint()?;
134142
if val < 16 {
135143
return Err(Error::NotMinimal);
@@ -143,7 +151,7 @@ impl<'a> BitReader<'a> {
143151

144152
/// Returns true if there are more non-zero bits to be read.
145153
pub fn has_more(&self) -> bool {
146-
self.bits > 0 || self.next_byte > 0 || !self.bytes.is_empty()
154+
self.bits != 0 || !self.bytes.is_empty()
147155
}
148156
}
149157

0 commit comments

Comments
 (0)