Skip to content

Commit d1bb72b

Browse files
authored
Merge pull request #443 from filecoin-project/optim/rle2
Optimize RLE+ decoding by 20%
2 parents 3fae757 + c5afcea commit d1bb72b

File tree

3 files changed

+107
-66
lines changed

3 files changed

+107
-66
lines changed

ipld/bitfield/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@ rand_xorshift = "0.2.0"
2020
rand = "0.7.3"
2121
criterion = "0.3"
2222
serde_json = "1.0"
23+
gperftools = "0.2.0"
2324

2425
[features]
2526
json = []
2627
enable-arbitrary = ["arbitrary"]
2728

29+
[lib]
30+
bench = false
31+
2832
[[bench]]
2933
name = "benchmarks"
3034
harness = false

ipld/bitfield/benches/benchmarks/main.rs

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,34 @@
33

44
mod examples;
55

6+
use std::fs;
7+
use std::path::Path;
8+
69
use criterion::{black_box, criterion_group, criterion_main, Criterion};
710
use examples::{example1, example2};
811
use fvm_ipld_bitfield::BitField;
12+
use gperftools::profiler::PROFILER;
13+
14+
struct Profiler;
15+
16+
impl criterion::profiler::Profiler for Profiler {
17+
fn start_profiling(&mut self, benchmark_id: &str, benchmark_dir: &Path) {
18+
fs::create_dir_all(benchmark_dir).unwrap();
19+
let bench_file = benchmark_id.to_owned() + ".prof";
20+
let bench_path = benchmark_dir.join(bench_file);
21+
let bench_str = bench_path.to_str().unwrap();
22+
23+
PROFILER.lock().unwrap().start(bench_str).unwrap();
24+
}
25+
26+
fn stop_profiling(&mut self, _: &str, _: &Path) {
27+
PROFILER.lock().unwrap().stop().unwrap();
28+
}
29+
}
30+
31+
fn profiled() -> Criterion {
32+
Criterion::default().with_profiler(Profiler {})
33+
}
934

1035
fn len(c: &mut Criterion) {
1136
let bf = example1();
@@ -24,11 +49,13 @@ fn new(c: &mut Criterion) {
2449
}
2550

2651
fn decode_encode(c: &mut Criterion) {
27-
let bf = example1();
2852
c.bench_function("decode_encode", |b| {
29-
b.iter(|| BitField::from_ranges(bf.ranges()))
53+
b.iter(|| BitField::from_ranges(example1().ranges()))
3054
});
3155
}
56+
fn decode(c: &mut Criterion) {
57+
c.bench_function("decode", |b| b.iter(example1));
58+
}
3259

3360
fn from_ranges(c: &mut Criterion) {
3461
let vec: Vec<_> = example1().ranges().collect();
@@ -95,20 +122,23 @@ fn get(c: &mut Criterion) {
95122
}
96123

97124
criterion_group!(
98-
benches,
99-
len,
100-
bits,
101-
new,
102-
decode_encode,
103-
from_ranges,
104-
is_empty,
105-
intersection,
106-
union,
107-
difference,
108-
symmetric_difference,
109-
cut,
110-
contains_all,
111-
contains_any,
112-
get,
125+
name = benches;
126+
config = profiled();
127+
targets =
128+
len,
129+
bits,
130+
new,
131+
decode,
132+
decode_encode,
133+
from_ranges,
134+
is_empty,
135+
intersection,
136+
union,
137+
difference,
138+
symmetric_difference,
139+
cut,
140+
contains_all,
141+
contains_any,
142+
get,
113143
);
114144
criterion_main!(benches);

ipld/bitfield/src/rleplus/reader.rs

Lines changed: 56 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,13 @@ const VARINT_MAX_BYTES: usize = 10;
99
/// A `BitReader` allows for efficiently reading bits from a byte buffer, up to a byte at a time.
1010
///
1111
/// It works by always storing at least the next 8 bits in `bits`, which lets us conveniently
12-
/// and efficiently read bits that cross a byte boundary. It's filled with the bits from `next_byte`
13-
/// after every read operation, which is in turn replaced by the next byte from `bytes` as soon
14-
/// as the next read might read bits from `next_byte`.
12+
/// and efficiently read bits that cross a byte boundary.
1513
pub struct BitReader<'a> {
1614
/// The bytes that have not been read from yet.
1715
bytes: &'a [u8],
18-
/// The next byte from `bytes` to be added to `bits`.
19-
next_byte: u8,
2016
/// The next bits to be read.
21-
bits: u16,
17+
bits: u64,
18+
2219
/// The number of bits in `bits` from bytes that came before `next_byte` (at least 8, at most 15).
2320
num_bits: u32,
2421
}
@@ -31,51 +28,61 @@ impl<'a> BitReader<'a> {
3128
if bytes.last() == Some(&0) {
3229
return Err(Error::NotMinimal);
3330
}
31+
let mut bits = 0u64;
32+
for i in 0..2 {
33+
let byte = bytes.get(i).unwrap_or(&0);
34+
bits |= (*byte as u64) << (8 * i);
35+
}
3436

35-
let &byte1 = bytes.get(0).unwrap_or(&0);
36-
let &byte2 = bytes.get(1).unwrap_or(&0);
37-
let bytes = if bytes.len() > 2 { &bytes[2..] } else { &[] };
37+
let bytes = bytes.get(2..).unwrap_or(&[]);
3838

3939
Ok(Self {
4040
bytes,
41-
bits: byte1 as u16,
42-
next_byte: byte2,
43-
num_bits: 8,
41+
bits,
42+
num_bits: 16,
4443
})
4544
}
4645

47-
/// Reads a given number of bits from the buffer. Will keep returning 0 once
46+
/// Peeks a given number of bits from the buffer.Will keep returning 0 once
4847
/// the buffer has been exhausted.
49-
pub fn read(&mut self, num_bits: u32) -> u8 {
48+
#[inline(always)]
49+
pub fn peek(&self, num_bits: u32) -> u8 {
5050
debug_assert!(num_bits <= 8);
5151

5252
// creates a mask with a `num_bits` number of 1s in order
5353
// to get only the bits we need from `self.bits`
5454
let mask = (1 << num_bits) - 1;
55-
let res = (self.bits & mask) as u8;
55+
(self.bits & mask) as u8
56+
}
5657

57-
// removes the bits we've just read from local storage
58-
// because we don't need them anymore
58+
/// Drops a number of bits from the buffer
59+
#[inline(always)]
60+
pub fn drop(&mut self, num_bits: u32) {
61+
debug_assert!(num_bits <= 8);
62+
63+
// removes the bits
5964
self.bits >>= num_bits;
6065
self.num_bits -= num_bits;
6166

62-
// this unconditionally adds the next byte to `bits`,
63-
// regardless of whether there's enough space or not. the
64-
// point is to make sure that `bits` always contains
65-
// at least the next 8 bits to be read
66-
self.bits |= (self.next_byte as u16) << self.num_bits;
67+
// not sure why this being outside of the if improves the performance
68+
// bit it does, probably related to keeping caches warm
69+
let byte = self.bytes.first().unwrap_or(&0);
70+
self.bits |= (*byte as u64) << self.num_bits;
6771

68-
// if fewer than 8 bits remain, we increment `self.num_bits`
69-
// to include the bits from `next_byte` (which is already
70-
// contained in `bits`) and we update `next_byte` with the
71-
// data to be read after that
72+
// if fewer than 8 bits remain, we skip to loading the next byte
7273
if self.num_bits < 8 {
7374
self.num_bits += 8;
74-
75-
let (&next_byte, bytes) = self.bytes.split_first().unwrap_or((&0, &[]));
76-
self.next_byte = next_byte;
77-
self.bytes = bytes;
75+
self.bytes = self.bytes.get(1..).unwrap_or(&[]);
7876
}
77+
}
78+
79+
/// Reads a given number of bits from the buffer. Will keep returning 0 once
80+
/// the buffer has been exhausted.
81+
pub fn read(&mut self, num_bits: u32) -> u8 {
82+
debug_assert!(num_bits <= 8);
83+
84+
let res = self.peek(num_bits);
85+
self.drop(num_bits);
7986

8087
res
8188
}
@@ -114,36 +121,36 @@ impl<'a> BitReader<'a> {
114121
return Ok(None);
115122
}
116123

117-
let prefix_0 = self.read(1);
118-
let len = if prefix_0 == 1 {
124+
let peek6 = self.peek(6);
125+
126+
let len = if peek6 & 0b01 != 0 {
119127
// Block Single (prefix 1)
128+
self.drop(1);
120129
1
130+
} else if peek6 & 0b10 != 0 {
131+
// Block Short (prefix 01)
132+
let val = ((peek6 >> 2) & 0x0f) as u64;
133+
self.drop(6);
134+
if val < 2 {
135+
return Err(Error::NotMinimal);
136+
}
137+
val
121138
} else {
122-
let prefix_1 = self.read(1);
123-
124-
if prefix_1 == 1 {
125-
// Block Short (prefix 01)
126-
let val = self.read(4) as u64;
127-
if val < 2 {
128-
return Err(Error::NotMinimal);
129-
}
130-
val
131-
} else {
132-
// Block Long (prefix 00)
133-
let val = self.read_varint()?;
134-
if val < 16 {
135-
return Err(Error::NotMinimal);
136-
}
137-
val
139+
// Block Long (prefix 00)
140+
self.drop(2);
141+
let val = self.read_varint()?;
142+
if val < 16 {
143+
return Err(Error::NotMinimal);
138144
}
145+
val
139146
};
140147

141148
Ok(Some(len))
142149
}
143150

144151
/// Returns true if there are more non-zero bits to be read.
145152
pub fn has_more(&self) -> bool {
146-
self.bits > 0 || self.next_byte > 0 || !self.bytes.is_empty()
153+
self.bits != 0 || !self.bytes.is_empty()
147154
}
148155
}
149156

0 commit comments

Comments
 (0)