Skip to content

Commit ea39922

Browse files
committed
Optimize LEB128 data reading
As it turns out, the Rust compiler uses variable length LEB128 encoded integers internally. It so happens that they spent a fair amount of effort micro-optimizing the decoding functionality [0] [1], as it's in the hot path. With this change we replace our decoding routines with these optimized ones. To make that happen more easily (and to gain some base line speed up), also remove the "shift" return from the respective methods. As a result of these changes, we see a respectable speed up: Before: > test bench_reading_leb128_unsigned ... bench: 235.83 ns/iter (+/- 32.53) After: > test bench_reading_leb128_unsigned ... bench: 157.38 ns/iter (+/- 17.09) [0] rust-lang/rust#69050 [1] rust-lang/rust#69157 Signed-off-by: Daniel Müller <[email protected]>
1 parent 0c2a60c commit ea39922

File tree

2 files changed

+53
-12
lines changed

2 files changed

+53
-12
lines changed

benches/bench.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,48 @@ use gimli::{
66
AttributeValue, DebugAbbrev, DebugAddr, DebugAddrBase, DebugAranges, DebugInfo, DebugLine,
77
DebugLineOffset, DebugLoc, DebugLocLists, DebugPubNames, DebugPubTypes, DebugRanges,
88
DebugRngLists, Encoding, EndianSlice, EntriesTreeNode, Expression, LittleEndian, LocationLists,
9-
Operation, RangeLists, RangeListsOffset, Reader, ReaderOffset,
9+
Operation, RangeLists, RangeListsOffset, Reader, ReaderOffset, leb128
1010
};
1111
use std::env;
1212
use std::fs::File;
1313
use std::io::Read;
1414
use std::path::PathBuf;
1515
use std::rc::Rc;
1616

17+
#[bench]
18+
fn bench_reading_leb128_unsigned(b: &mut test::Bencher) {
19+
#[rustfmt::skip]
20+
let data = [
21+
([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01], u64::MAX),
22+
([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00], u64::MAX / 2),
23+
([0xd5, 0xaa, 0xd5, 0xaa, 0xd5, 0xaa, 0xd5, 0xaa, 0x55, 0x00], u64::MAX / 3),
24+
([0xb3, 0xe6, 0xcc, 0x99, 0xb3, 0xe6, 0xcc, 0x99, 0x33, 0x00], u64::MAX / 5),
25+
([0xaa, 0xd5, 0xaa, 0xd5, 0xaa, 0xd5, 0xaa, 0xd5, 0x2a, 0x00], u64::MAX / 6),
26+
([0x92, 0xc9, 0xa4, 0x92, 0xc9, 0xa4, 0x92, 0xc9, 0x24, 0x00], u64::MAX / 7),
27+
([0xf1, 0xb8, 0x9c, 0x8e, 0xc7, 0xe3, 0xf1, 0xb8, 0x1c, 0x00], u64::MAX / 9),
28+
([0x99, 0xb3, 0xe6, 0xcc, 0x99, 0xb3, 0xe6, 0xcc, 0x19, 0x00], u64::MAX / 10),
29+
([0xd1, 0x8b, 0xdd, 0xe8, 0xc5, 0xae, 0xf4, 0xa2, 0x17, 0x00], u64::MAX / 11),
30+
([0xd5, 0xaa, 0xd5, 0xaa, 0xd5, 0xaa, 0xd5, 0xaa, 0x15, 0x00], u64::MAX / 12),
31+
([0xb1, 0xa7, 0xec, 0x89, 0xbb, 0xe2, 0xce, 0xd8, 0x13, 0x00], u64::MAX / 13),
32+
([0xc9, 0xa4, 0x92, 0xc9, 0xa4, 0x92, 0xc9, 0xa4, 0x12, 0x00], u64::MAX / 14),
33+
([0x91, 0xa2, 0xc4, 0x88, 0x91, 0xa2, 0xc4, 0x88, 0x11, 0x00], u64::MAX / 15),
34+
];
35+
36+
for (data, expected) in data {
37+
let mut slice = test::black_box(EndianSlice::new(data.as_slice(), LittleEndian));
38+
let v = leb128::read::unsigned(&mut slice).unwrap();
39+
assert_eq!(v, expected);
40+
}
41+
42+
let () = b.iter(|| {
43+
for (data, _) in data {
44+
let mut slice = test::black_box(EndianSlice::new(data.as_slice(), LittleEndian));
45+
let v = leb128::read::unsigned(&mut slice).unwrap();
46+
test::black_box(v);
47+
}
48+
});
49+
}
50+
1751
pub fn read_section(section: &str) -> Vec<u8> {
1852
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()));
1953
path.push("./fixtures/self/");

src/leb128.rs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,23 +79,30 @@ pub mod read {
7979

8080
/// Read an unsigned LEB128 number from the given `Reader` and
8181
/// return it or an error if reading failed.
82+
// Slightly adjusted copy of `rustc` implementation:
83+
// https://github.com/rust-lang/rust/blob/be8de5d6a0fc5cb2924e174a809a0aff303f281a/compiler/rustc_serialize/src/leb128.rs#L53
8284
pub fn unsigned<R: Reader>(r: &mut R) -> Result<u64> {
83-
let mut result = 0;
84-
let mut shift = 0;
85-
85+
// The first iteration of this loop is unpeeled. This is a
86+
// performance win because this code is hot and integer values less
87+
// than 128 are very common, typically occurring 50-80% or more of
88+
// the time, even for u64 and u128.
89+
let byte = r.read_u8()?;
90+
if byte & CONTINUATION_BIT == 0 {
91+
return Ok(u64::from(byte));
92+
}
93+
let mut result = u64::from(low_bits_of_byte(byte));
94+
let mut shift = 7;
8695
loop {
8796
let byte = r.read_u8()?;
88-
if shift == 63 && byte != 0x00 && byte != 0x01 {
89-
return Err(Error::BadUnsignedLeb128);
90-
}
91-
92-
let low_bits = u64::from(low_bits_of_byte(byte));
93-
result |= low_bits << shift;
94-
9597
if byte & CONTINUATION_BIT == 0 {
98+
result |= u64::from(byte) << shift;
9699
return Ok(result);
100+
} else {
101+
result |= u64::from(low_bits_of_byte(byte)) << shift;
102+
}
103+
if shift >= 63 {
104+
return Err(Error::BadUnsignedLeb128);
97105
}
98-
99106
shift += 7;
100107
}
101108
}

0 commit comments

Comments
 (0)