Skip to content

Commit b3297fb

Browse files
Merge pull request #76 from frankmcsherry/decode_improvements
Decode improvements
2 parents e29e702 + 55f1a94 commit b3297fb

File tree

11 files changed

+219
-8
lines changed

11 files changed

+219
-8
lines changed

columnar_derive/src/lib.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,20 @@ fn derive_struct(name: &syn::Ident, generics: &syn::Generics, data_struct: syn::
317317

318318
quote! {
319319
impl #impl_gen ::columnar::FromBytes<'columnar> for #c_ident #ty_gen #where_clause {
320+
const SLICE_COUNT: usize = 0 #(+ <#container_types>::SLICE_COUNT)*;
320321
#[inline(always)]
321322
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
322323
Self { #(#names: ::columnar::FromBytes::from_bytes(bytes),)* }
323324
}
325+
#[inline(always)]
326+
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
327+
let mut _offset = 0;
328+
#(
329+
let #names = <#container_types>::from_byte_slices(&bytes[_offset .. _offset + <#container_types>::SLICE_COUNT]);
330+
_offset += <#container_types>::SLICE_COUNT;
331+
)*
332+
Self { #(#names,)* }
333+
}
324334
}
325335
}
326336
};
@@ -500,10 +510,15 @@ fn derive_unit_struct(name: &syn::Ident, _generics: &syn::Generics, vis: syn::Vi
500510
}
501511

502512
impl<'columnar> ::columnar::FromBytes<'columnar> for #c_ident <&'columnar u64> {
513+
const SLICE_COUNT: usize = 1;
503514
#[inline(always)]
504515
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
505516
Self { count: &::columnar::bytemuck::try_cast_slice(bytes.next().unwrap()).unwrap()[0] }
506517
}
518+
#[inline(always)]
519+
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
520+
Self { count: &::columnar::bytemuck::try_cast_slice(bytes[0]).unwrap()[0] }
521+
}
507522
}
508523

509524
impl ::columnar::Columnar for #name {
@@ -884,6 +899,7 @@ fn derive_enum(name: &syn::Ident, generics: &syn:: Generics, data_enum: syn::Dat
884899
quote! {
885900
#[allow(non_snake_case)]
886901
impl #impl_gen ::columnar::FromBytes<'columnar> for #c_ident #ty_gen #where_clause {
902+
const SLICE_COUNT: usize = 0 #(+ <#container_types>::SLICE_COUNT)* + CVar::SLICE_COUNT + COff::SLICE_COUNT;
887903
#[inline(always)]
888904
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
889905
Self {
@@ -892,6 +908,18 @@ fn derive_enum(name: &syn::Ident, generics: &syn:: Generics, data_enum: syn::Dat
892908
offset: ::columnar::FromBytes::from_bytes(bytes),
893909
}
894910
}
911+
#[inline(always)]
912+
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
913+
let mut _offset = 0;
914+
#(
915+
let #names = <#container_types>::from_byte_slices(&bytes[_offset .. _offset + <#container_types>::SLICE_COUNT]);
916+
_offset += <#container_types>::SLICE_COUNT;
917+
)*
918+
let variant = CVar::from_byte_slices(&bytes[_offset .. _offset + CVar::SLICE_COUNT]);
919+
_offset += CVar::SLICE_COUNT;
920+
let offset = COff::from_byte_slices(&bytes[_offset ..]);
921+
Self { #(#names,)* variant, offset }
922+
}
895923
}
896924
}
897925
};
@@ -1153,10 +1181,15 @@ fn derive_tags(name: &syn::Ident, _generics: &syn:: Generics, data_enum: syn::Da
11531181
}
11541182

11551183
impl<'columnar, CVar: ::columnar::FromBytes<'columnar>> ::columnar::FromBytes<'columnar> for #c_ident <CVar> {
1184+
const SLICE_COUNT: usize = CVar::SLICE_COUNT;
11561185
#[inline(always)]
11571186
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
11581187
Self { variant: ::columnar::FromBytes::from_bytes(bytes) }
11591188
}
1189+
#[inline(always)]
1190+
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
1191+
Self { variant: CVar::from_byte_slices(bytes) }
1192+
}
11601193
}
11611194

11621195
impl ::columnar::Columnar for #name {

src/arc.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ impl<'a, T: AsBytes<'a>> AsBytes<'a> for Arc<T> {
2323
#[inline(always)] fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> { self.as_ref().as_bytes() }
2424
}
2525
impl<'a, T: FromBytes<'a>> FromBytes<'a> for Arc<T> {
26+
const SLICE_COUNT: usize = T::SLICE_COUNT;
2627
#[inline(always)] fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self { Arc::new(T::from_bytes(bytes)) }
28+
#[inline(always)] fn from_byte_slices(bytes: &[&'a [u8]]) -> Self { Arc::new(T::from_byte_slices(bytes)) }
2729
}
2830

2931
#[cfg(test)]

src/boxed.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ impl<'a, C: AsBytes<'a>> AsBytes<'a> for Boxed<C> {
5959
#[inline(always)] fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> { self.0.as_bytes() }
6060
}
6161
impl<'a, C: FromBytes<'a>> FromBytes<'a> for Boxed<C> {
62+
const SLICE_COUNT: usize = C::SLICE_COUNT;
6263
#[inline(always)] fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self { Self(C::from_bytes(bytes)) }
64+
#[inline(always)] fn from_byte_slices(bytes: &[&'a [u8]]) -> Self { Self(C::from_byte_slices(bytes)) }
6365
}
6466
impl<C: Index> Index for Boxed<C> {
6567
type Ref = Boxed<C::Ref>;

src/bytes.rs

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -243,20 +243,26 @@ pub mod serialization_neu {
243243
}
244244

245245
/// Decodes an encoded sequence of byte slices. Each result will be `u64` aligned.
246+
#[inline(always)]
246247
pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
247-
assert!(store[0] % 8 == 0);
248-
let slices = (store[0] / 8) - 1;
249-
(0 .. slices).map(|i| decode_index(store, i))
248+
let slices = store[0] as usize / 8 - 1;
249+
let index = &store[..slices + 1];
250+
let last = index[slices] as usize;
251+
let bytes: &[u8] = &bytemuck::cast_slice(store)[..last];
252+
(0 .. slices).map(move |i| {
253+
let upper = (index[i + 1] as usize).min(last);
254+
let lower = (((index[i] as usize) + 7) & !7).min(upper);
255+
&bytes[lower .. upper]
256+
})
250257
}
251258

252259
/// Decodes a specific byte slice by index. It will be `u64` aligned.
253260
#[inline(always)]
254261
pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
255-
debug_assert!(index + 1 < store[0]/8);
256-
let index: usize = index.try_into().unwrap();
257-
let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
258-
let upper: usize = (store[index + 1]).try_into().unwrap();
259-
let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
262+
let index = index as usize;
263+
let bytes: &[u8] = bytemuck::cast_slice(store);
264+
let upper = (store[index + 1] as usize).min(bytes.len());
265+
let lower = (((store[index] as usize) + 7) & !7).min(upper);
260266
&bytes[lower .. upper]
261267
}
262268

@@ -433,5 +439,35 @@ mod test {
433439
assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
434440
assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
435441
}
442+
443+
// Test from_byte_slices round-trip.
444+
let byte_vec: Vec<&[u8]> = column.borrow().as_bytes().map(|(_, bytes)| bytes).collect();
445+
let column4 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_byte_slices(&byte_vec);
446+
for i in 0..100 {
447+
assert_eq!(column.get(2*i+0), column4.get(2*i+0).copied().map_err(|e| *e));
448+
assert_eq!(column.get(2*i+1), column4.get(2*i+1).copied().map_err(|e| *e));
449+
}
450+
}
451+
452+
/// Test from_byte_slices for tuples.
453+
#[test]
454+
fn from_byte_slices_tuple() {
455+
use crate::common::{Push, Index};
456+
use crate::{Borrow, AsBytes, FromBytes, ContainerOf};
457+
458+
let mut column: ContainerOf<(u64, String, Vec<u32>)> = Default::default();
459+
for i in 0..50u64 {
460+
column.push(&(i, format!("hello {i}"), vec![i as u32; i as usize]));
461+
}
462+
463+
let byte_vec: Vec<&[u8]> = column.borrow().as_bytes().map(|(_, bytes)| bytes).collect();
464+
type Borrowed<'a> = <ContainerOf<(u64, String, Vec<u32>)> as crate::Borrow>::Borrowed<'a>;
465+
let reconstructed = Borrowed::from_byte_slices(&byte_vec);
466+
for i in 0..50 {
467+
let (a, b, _c) = reconstructed.get(i);
468+
assert_eq!(*a, i as u64);
469+
assert_eq!(b, &*format!("hello {i}"));
470+
}
436471
}
472+
437473
}

src/lib.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,11 @@ pub mod common {
619619
/// Implementors of this trait almost certainly reference the lifetime `'a` themselves,
620620
/// unless they actively deserialize the bytes (vs sit on the slices, as if zero-copy).
621621
pub trait FromBytes<'a> {
622+
/// The number of byte slices this type consumes when reconstructed.
623+
///
624+
/// This enables `from_byte_slices`, which can index directly into a slice
625+
/// of byte slices rather than consuming from an iterator sequentially.
626+
const SLICE_COUNT: usize;
622627
/// Reconstructs `self` from a sequence of correctly aligned and sized bytes slices.
623628
///
624629
/// The implementation is expected to consume the right number of items from the iterator,
@@ -631,6 +636,14 @@ pub mod common {
631636
/// they are inlined. A single non-inlined function on a tree of `from_bytes` calls
632637
/// can cause the performance to drop significantly.
633638
fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
639+
/// Reconstructs `self` from a slice of byte slices, using direct indexing.
640+
///
641+
/// The slice should contain exactly `Self::SLICE_COUNT` elements.
642+
/// This avoids the iterator chain overhead of `from_bytes`.
643+
#[inline(always)]
644+
fn from_byte_slices(bytes: &[&'a [u8]]) -> Self where Self: Sized {
645+
Self::from_bytes(&mut bytes.iter().copied())
646+
}
634647
}
635648

636649
}

0 commit comments

Comments
 (0)