Skip to content

Commit 7aed441

Browse files
Ariel Ben-Yehudaarielb1
authored andcommitted
don't emit leading zeros in tagged integers
we *already* need a length, so might as well use it this saves 3% in libcore 559870 liballoc-bb943c5a.rlib 1425170 liballoc_jemalloc-bb943c5a.rlib 10120 liballoc_system-bb943c5a.rlib 152398 libarena-bb943c5a.rlib 4023670 libcollections-bb943c5a.rlib 18042746 libcore-bb943c5a.rlib 198202 libflate-bb943c5a.rlib 244412 libfmt_macros-bb943c5a.rlib 555750 libgetopts-bb943c5a.rlib 222462 libgraphviz-bb943c5a.rlib 417824 liblibc-bb943c5a.rlib 187804 liblog-bb943c5a.rlib 722742 librand-bb943c5a.rlib 604846 librbml-bb943c5a.rlib 1397814 librustc_back-bb943c5a.rlib 38382616 librustc-bb943c5a.rlib 12826 librustc_bitflags-bb943c5a.rlib 2298772 librustc_borrowck-bb943c5a.rlib 570822 librustc_data_structures-bb943c5a.rlib 9361826 librustc_driver-bb943c5a.rlib 9479914 librustc_front-bb943c5a.rlib 1604576 librustc_lint-bb943c5a.rlib 79190586 librustc_llvm-bb943c5a.rlib 4783104 librustc_mir-bb943c5a.rlib 3534332 librustc_platform_intrinsics-bb943c5a.rlib 592710 librustc_privacy-bb943c5a.rlib 3123792 librustc_resolve-bb943c5a.rlib 14183434 librustc_trans-bb943c5a.rlib 11937016 librustc_typeck-bb943c5a.rlib 1830988 librustc_unicode-bb943c5a.rlib 15611582 librustdoc-bb943c5a.rlib 2892482 libserialize-bb943c5a.rlib 8642922 libstd-bb943c5a.rlib 30590774 libsyntax-bb943c5a.rlib 912678 libterm-bb943c5a.rlib 1369754 libtest-bb943c5a.rlib
1 parent 38cd6d4 commit 7aed441

File tree

1 file changed

+44
-29
lines changed

1 file changed

+44
-29
lines changed

src/librbml/lib.rs

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -467,37 +467,44 @@ pub mod reader {
467467
f(&d.data[d.start..d.end])
468468
}
469469

470-
471470
pub fn doc_as_u8(d: Doc) -> u8 {
472471
assert_eq!(d.end, d.start + 1);
473472
d.data[d.start]
474473
}
475474

476-
pub fn doc_as_u16(d: Doc) -> u16 {
477-
assert_eq!(d.end, d.start + 2);
478-
let mut b = [0; 2];
479-
bytes::copy_memory(&d.data[d.start..d.end], &mut b);
480-
unsafe { (*(b.as_ptr() as *const u16)).to_be() }
481-
}
482-
483-
pub fn doc_as_u32(d: Doc) -> u32 {
484-
assert_eq!(d.end, d.start + 4);
485-
let mut b = [0; 4];
486-
bytes::copy_memory(&d.data[d.start..d.end], &mut b);
487-
unsafe { (*(b.as_ptr() as *const u32)).to_be() }
488-
}
489-
490475
pub fn doc_as_u64(d: Doc) -> u64 {
491-
assert_eq!(d.end, d.start + 8);
492-
let mut b = [0; 8];
493-
bytes::copy_memory(&d.data[d.start..d.end], &mut b);
494-
unsafe { (*(b.as_ptr() as *const u64)).to_be() }
476+
if d.end >= 8 {
477+
// For performance, we read 8 big-endian bytes,
478+
// and mask off the junk if there is any. This
479+
// obviously won't work on the first 8 bytes
480+
// of a file - we will fall of the start
481+
// of the page and segfault.
482+
483+
let mut b = [0; 8];
484+
bytes::copy_memory(&d.data[d.end-8..d.end], &mut b);
485+
let data = unsafe { (*(b.as_ptr() as *const u64)).to_be() };
486+
let len = d.end - d.start;
487+
if len < 8 {
488+
data & ((1<<(len*8))-1)
489+
} else {
490+
data
491+
}
492+
} else {
493+
let mut result = 0;
494+
for b in &d.data[d.start..d.end] {
495+
result = (result<<8) + (*b as u64);
496+
}
497+
result
498+
}
495499
}
496500

497-
pub fn doc_as_i8(d: Doc) -> i8 { doc_as_u8(d) as i8 }
498-
pub fn doc_as_i16(d: Doc) -> i16 { doc_as_u16(d) as i16 }
499-
pub fn doc_as_i32(d: Doc) -> i32 { doc_as_u32(d) as i32 }
500-
pub fn doc_as_i64(d: Doc) -> i64 { doc_as_u64(d) as i64 }
501+
#[inline] pub fn doc_as_u16(d: Doc) -> u16 { doc_as_u64(d) as u16 }
502+
#[inline] pub fn doc_as_u32(d: Doc) -> u32 { doc_as_u64(d) as u32 }
503+
504+
#[inline] pub fn doc_as_i8(d: Doc) -> i8 { doc_as_u8(d) as i8 }
505+
#[inline] pub fn doc_as_i16(d: Doc) -> i16 { doc_as_u16(d) as i16 }
506+
#[inline] pub fn doc_as_i32(d: Doc) -> i32 { doc_as_u32(d) as i32 }
507+
#[inline] pub fn doc_as_i64(d: Doc) -> i64 { doc_as_u64(d) as i64 }
501508

502509
pub struct Decoder<'a> {
503510
parent: Doc<'a>,
@@ -996,35 +1003,43 @@ pub mod writer {
9961003

9971004
pub fn wr_tagged_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
9981005
let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
999-
self.wr_tagged_bytes(tag_id, &bytes)
1006+
// tagged integers are emitted in big-endian, with no
1007+
// leading zeros.
1008+
let leading_zero_bytes = v.leading_zeros()/8;
1009+
self.wr_tagged_bytes(tag_id, &bytes[leading_zero_bytes as usize..])
10001010
}
10011011

1002-
pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult{
1003-
let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) };
1004-
self.wr_tagged_bytes(tag_id, &bytes)
1012+
#[inline]
1013+
pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1014+
self.wr_tagged_u64(tag_id, v as u64)
10051015
}
10061016

1017+
#[inline]
10071018
pub fn wr_tagged_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1008-
let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) };
1009-
self.wr_tagged_bytes(tag_id, &bytes)
1019+
self.wr_tagged_u64(tag_id, v as u64)
10101020
}
10111021

1022+
#[inline]
10121023
pub fn wr_tagged_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
10131024
self.wr_tagged_bytes(tag_id, &[v])
10141025
}
10151026

1027+
#[inline]
10161028
pub fn wr_tagged_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
10171029
self.wr_tagged_u64(tag_id, v as u64)
10181030
}
10191031

1032+
#[inline]
10201033
pub fn wr_tagged_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
10211034
self.wr_tagged_u32(tag_id, v as u32)
10221035
}
10231036

1037+
#[inline]
10241038
pub fn wr_tagged_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
10251039
self.wr_tagged_u16(tag_id, v as u16)
10261040
}
10271041

1042+
#[inline]
10281043
pub fn wr_tagged_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
10291044
self.wr_tagged_bytes(tag_id, &[v as u8])
10301045
}

0 commit comments

Comments
 (0)