don't emit leading zeros in tagged integers

Ariel Ben-Yehuda · arielb1 · commit 7aed441ac8ef · 2015-10-01T18:21:04.000+03:00
we *already* need a length, so might as well use it this saves 3% in libcore 559870 liballoc-bb943c5a.rlib 1425170 liballoc_jemalloc-bb943c5a.rlib 10120 liballoc_system-bb943c5a.rlib 152398 libarena-bb943c5a.rlib 4023670 libcollections-bb943c5a.rlib 18042746 libcore-bb943c5a.rlib 198202 libflate-bb943c5a.rlib 244412 libfmt_macros-bb943c5a.rlib 555750 libgetopts-bb943c5a.rlib 222462 libgraphviz-bb943c5a.rlib 417824 liblibc-bb943c5a.rlib 187804 liblog-bb943c5a.rlib 722742 librand-bb943c5a.rlib 604846 librbml-bb943c5a.rlib 1397814 librustc_back-bb943c5a.rlib 38382616 librustc-bb943c5a.rlib 12826 librustc_bitflags-bb943c5a.rlib 2298772 librustc_borrowck-bb943c5a.rlib 570822 librustc_data_structures-bb943c5a.rlib 9361826 librustc_driver-bb943c5a.rlib 9479914 librustc_front-bb943c5a.rlib 1604576 librustc_lint-bb943c5a.rlib 79190586 librustc_llvm-bb943c5a.rlib 4783104 librustc_mir-bb943c5a.rlib 3534332 librustc_platform_intrinsics-bb943c5a.rlib 592710 librustc_privacy-bb943c5a.rlib 3123792 librustc_resolve-bb943c5a.rlib 14183434 librustc_trans-bb943c5a.rlib 11937016 librustc_typeck-bb943c5a.rlib 1830988 librustc_unicode-bb943c5a.rlib 15611582 librustdoc-bb943c5a.rlib 2892482 libserialize-bb943c5a.rlib 8642922 libstd-bb943c5a.rlib 30590774 libsyntax-bb943c5a.rlib 912678 libterm-bb943c5a.rlib 1369754 libtest-bb943c5a.rlib
diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs
@@ -467,37 +467,44 @@ pub mod reader {
         f(&d.data[d.start..d.end])
     }
 
-
     pub fn doc_as_u8(d: Doc) -> u8 {
         assert_eq!(d.end, d.start + 1);
         d.data[d.start]
     }
 
-    pub fn doc_as_u16(d: Doc) -> u16 {
-        assert_eq!(d.end, d.start + 2);
-        let mut b = [0; 2];
-        bytes::copy_memory(&d.data[d.start..d.end], &mut b);
-        unsafe { (*(b.as_ptr() as *const u16)).to_be() }
-    }
-
-    pub fn doc_as_u32(d: Doc) -> u32 {
-        assert_eq!(d.end, d.start + 4);
-        let mut b = [0; 4];
-        bytes::copy_memory(&d.data[d.start..d.end], &mut b);
-        unsafe { (*(b.as_ptr() as *const u32)).to_be() }
-    }
-
     pub fn doc_as_u64(d: Doc) -> u64 {
-        assert_eq!(d.end, d.start + 8);
-        let mut b = [0; 8];
-        bytes::copy_memory(&d.data[d.start..d.end], &mut b);
-        unsafe { (*(b.as_ptr() as *const u64)).to_be() }
+        if d.end >= 8 {
+            // For performance, we read 8 big-endian bytes,
+            // and mask off the junk if there is any. This
+            // obviously won't work on the first 8 bytes
+            // of a file - we will fall of the start
+            // of the page and segfault.
+
+            let mut b = [0; 8];
+            bytes::copy_memory(&d.data[d.end-8..d.end], &mut b);
+            let data = unsafe { (*(b.as_ptr() as *const u64)).to_be() };
+            let len = d.end - d.start;
+            if len < 8 {
+                data & ((1<<(len*8))-1)
+            } else {
+                data
+            }
+        } else {
+            let mut result = 0;
+            for b in &d.data[d.start..d.end] {
+                result = (result<<8) + (*b as u64);
+            }
+            result
+        }
     }
 
-    pub fn doc_as_i8(d: Doc) -> i8 { doc_as_u8(d) as i8 }
-    pub fn doc_as_i16(d: Doc) -> i16 { doc_as_u16(d) as i16 }
-    pub fn doc_as_i32(d: Doc) -> i32 { doc_as_u32(d) as i32 }
-    pub fn doc_as_i64(d: Doc) -> i64 { doc_as_u64(d) as i64 }
+    #[inline] pub fn doc_as_u16(d: Doc) -> u16 { doc_as_u64(d) as u16 }
+    #[inline] pub fn doc_as_u32(d: Doc) -> u32 { doc_as_u64(d) as u32 }
+
+    #[inline] pub fn doc_as_i8(d: Doc) -> i8 { doc_as_u8(d) as i8 }
+    #[inline] pub fn doc_as_i16(d: Doc) -> i16 { doc_as_u16(d) as i16 }
+    #[inline] pub fn doc_as_i32(d: Doc) -> i32 { doc_as_u32(d) as i32 }
+    #[inline] pub fn doc_as_i64(d: Doc) -> i64 { doc_as_u64(d) as i64 }
 
     pub struct Decoder<'a> {
         parent: Doc<'a>,
@@ -996,35 +1003,43 @@ pub mod writer {
 
         pub fn wr_tagged_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
             let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
-            self.wr_tagged_bytes(tag_id, &bytes)
+            // tagged integers are emitted in big-endian, with no
+            // leading zeros.
+            let leading_zero_bytes = v.leading_zeros()/8;
+            self.wr_tagged_bytes(tag_id, &bytes[leading_zero_bytes as usize..])
         }
 
-        pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32)  -> EncodeResult{
-            let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) };
-            self.wr_tagged_bytes(tag_id, &bytes)
+        #[inline]
+        pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32)  -> EncodeResult {
+            self.wr_tagged_u64(tag_id, v as u64)
         }
 
+        #[inline]
         pub fn wr_tagged_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
-            let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) };
-            self.wr_tagged_bytes(tag_id, &bytes)
+            self.wr_tagged_u64(tag_id, v as u64)
         }
 
+        #[inline]
         pub fn wr_tagged_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
             self.wr_tagged_bytes(tag_id, &[v])
         }
 
+        #[inline]
         pub fn wr_tagged_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
             self.wr_tagged_u64(tag_id, v as u64)
         }
 
+        #[inline]
         pub fn wr_tagged_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
             self.wr_tagged_u32(tag_id, v as u32)
         }
 
+        #[inline]
         pub fn wr_tagged_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
             self.wr_tagged_u16(tag_id, v as u16)
         }
 
+        #[inline]
         pub fn wr_tagged_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
             self.wr_tagged_bytes(tag_id, &[v as u8])
         }