Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions columnar_derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,20 @@ fn derive_struct(name: &syn::Ident, generics: &syn::Generics, data_struct: syn::

quote! {
impl #impl_gen ::columnar::FromBytes<'columnar> for #c_ident #ty_gen #where_clause {
const SLICE_COUNT: usize = 0 #(+ <#container_types>::SLICE_COUNT)*;
#[inline(always)]
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
Self { #(#names: ::columnar::FromBytes::from_bytes(bytes),)* }
}
#[inline(always)]
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
let mut _offset = 0;
#(
let #names = <#container_types>::from_byte_slices(&bytes[_offset .. _offset + <#container_types>::SLICE_COUNT]);
_offset += <#container_types>::SLICE_COUNT;
)*
Self { #(#names,)* }
}
}
}
};
Expand Down Expand Up @@ -500,10 +510,15 @@ fn derive_unit_struct(name: &syn::Ident, _generics: &syn::Generics, vis: syn::Vi
}

impl<'columnar> ::columnar::FromBytes<'columnar> for #c_ident <&'columnar u64> {
const SLICE_COUNT: usize = 1;
#[inline(always)]
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
Self { count: &::columnar::bytemuck::try_cast_slice(bytes.next().unwrap()).unwrap()[0] }
}
#[inline(always)]
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
Self { count: &::columnar::bytemuck::try_cast_slice(bytes[0]).unwrap()[0] }
}
}

impl ::columnar::Columnar for #name {
Expand Down Expand Up @@ -884,6 +899,7 @@ fn derive_enum(name: &syn::Ident, generics: &syn:: Generics, data_enum: syn::Dat
quote! {
#[allow(non_snake_case)]
impl #impl_gen ::columnar::FromBytes<'columnar> for #c_ident #ty_gen #where_clause {
const SLICE_COUNT: usize = 0 #(+ <#container_types>::SLICE_COUNT)* + CVar::SLICE_COUNT + COff::SLICE_COUNT;
#[inline(always)]
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
Self {
Expand All @@ -892,6 +908,18 @@ fn derive_enum(name: &syn::Ident, generics: &syn:: Generics, data_enum: syn::Dat
offset: ::columnar::FromBytes::from_bytes(bytes),
}
}
#[inline(always)]
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
let mut _offset = 0;
#(
let #names = <#container_types>::from_byte_slices(&bytes[_offset .. _offset + <#container_types>::SLICE_COUNT]);
_offset += <#container_types>::SLICE_COUNT;
)*
let variant = CVar::from_byte_slices(&bytes[_offset .. _offset + CVar::SLICE_COUNT]);
_offset += CVar::SLICE_COUNT;
let offset = COff::from_byte_slices(&bytes[_offset ..]);
Self { #(#names,)* variant, offset }
}
}
}
};
Expand Down Expand Up @@ -1153,10 +1181,15 @@ fn derive_tags(name: &syn::Ident, _generics: &syn:: Generics, data_enum: syn::Da
}

impl<'columnar, CVar: ::columnar::FromBytes<'columnar>> ::columnar::FromBytes<'columnar> for #c_ident <CVar> {
const SLICE_COUNT: usize = CVar::SLICE_COUNT;
#[inline(always)]
fn from_bytes(bytes: &mut impl Iterator<Item=&'columnar [u8]>) -> Self {
Self { variant: ::columnar::FromBytes::from_bytes(bytes) }
}
#[inline(always)]
fn from_byte_slices(bytes: &[&'columnar [u8]]) -> Self {
Self { variant: CVar::from_byte_slices(bytes) }
}
}

impl ::columnar::Columnar for #name {
Expand Down
2 changes: 2 additions & 0 deletions src/arc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ impl<'a, T: AsBytes<'a>> AsBytes<'a> for Arc<T> {
#[inline(always)] fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> { self.as_ref().as_bytes() }
}
impl<'a, T: FromBytes<'a>> FromBytes<'a> for Arc<T> {
const SLICE_COUNT: usize = T::SLICE_COUNT;
#[inline(always)] fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self { Arc::new(T::from_bytes(bytes)) }
#[inline(always)] fn from_byte_slices(bytes: &[&'a [u8]]) -> Self { Arc::new(T::from_byte_slices(bytes)) }
}

#[cfg(test)]
Expand Down
2 changes: 2 additions & 0 deletions src/boxed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ impl<'a, C: AsBytes<'a>> AsBytes<'a> for Boxed<C> {
#[inline(always)] fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> { self.0.as_bytes() }
}
impl<'a, C: FromBytes<'a>> FromBytes<'a> for Boxed<C> {
const SLICE_COUNT: usize = C::SLICE_COUNT;
#[inline(always)] fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self { Self(C::from_bytes(bytes)) }
#[inline(always)] fn from_byte_slices(bytes: &[&'a [u8]]) -> Self { Self(C::from_byte_slices(bytes)) }
}
impl<C: Index> Index for Boxed<C> {
type Ref = Boxed<C::Ref>;
Expand Down
52 changes: 44 additions & 8 deletions src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,20 +243,26 @@ pub mod serialization_neu {
}

/// Decodes an encoded sequence of byte slices. Each result will be `u64` aligned.
#[inline(always)]
pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
assert!(store[0] % 8 == 0);
let slices = (store[0] / 8) - 1;
(0 .. slices).map(|i| decode_index(store, i))
let slices = store[0] as usize / 8 - 1;
let index = &store[..slices + 1];
let last = index[slices] as usize;
let bytes: &[u8] = &bytemuck::cast_slice(store)[..last];
(0 .. slices).map(move |i| {
let upper = (index[i + 1] as usize).min(last);
let lower = (((index[i] as usize) + 7) & !7).min(upper);
&bytes[lower .. upper]
})
}

/// Decodes a specific byte slice by index. It will be `u64` aligned.
#[inline(always)]
pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
debug_assert!(index + 1 < store[0]/8);
let index: usize = index.try_into().unwrap();
let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
let upper: usize = (store[index + 1]).try_into().unwrap();
let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
let index = index as usize;
let bytes: &[u8] = bytemuck::cast_slice(store);
let upper = (store[index + 1] as usize).min(bytes.len());
let lower = (((store[index] as usize) + 7) & !7).min(upper);
&bytes[lower .. upper]
}

Expand Down Expand Up @@ -433,5 +439,35 @@ mod test {
assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
}

// Test from_byte_slices round-trip.
let byte_vec: Vec<&[u8]> = column.borrow().as_bytes().map(|(_, bytes)| bytes).collect();
let column4 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_byte_slices(&byte_vec);
for i in 0..100 {
assert_eq!(column.get(2*i+0), column4.get(2*i+0).copied().map_err(|e| *e));
assert_eq!(column.get(2*i+1), column4.get(2*i+1).copied().map_err(|e| *e));
}
}

/// Test from_byte_slices for tuples.
#[test]
fn from_byte_slices_tuple() {
use crate::common::{Push, Index};
use crate::{Borrow, AsBytes, FromBytes, ContainerOf};

let mut column: ContainerOf<(u64, String, Vec<u32>)> = Default::default();
for i in 0..50u64 {
column.push(&(i, format!("hello {i}"), vec![i as u32; i as usize]));
}

let byte_vec: Vec<&[u8]> = column.borrow().as_bytes().map(|(_, bytes)| bytes).collect();
type Borrowed<'a> = <ContainerOf<(u64, String, Vec<u32>)> as crate::Borrow>::Borrowed<'a>;
let reconstructed = Borrowed::from_byte_slices(&byte_vec);
for i in 0..50 {
let (a, b, _c) = reconstructed.get(i);
assert_eq!(*a, i as u64);
assert_eq!(b, &*format!("hello {i}"));
}
}

}
13 changes: 13 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,11 @@ pub mod common {
/// Implementors of this trait almost certainly reference the lifetime `'a` themselves,
/// unless they actively deserialize the bytes (vs sit on the slices, as if zero-copy).
pub trait FromBytes<'a> {
/// The number of byte slices this type consumes when reconstructed.
///
/// This enables `from_byte_slices`, which can index directly into a slice
/// of byte slices rather than consuming from an iterator sequentially.
const SLICE_COUNT: usize;
/// Reconstructs `self` from a sequence of correctly aligned and sized bytes slices.
///
/// The implementation is expected to consume the right number of items from the iterator,
Expand All @@ -631,6 +636,14 @@ pub mod common {
/// they are inlined. A single non-inlined function on a tree of `from_bytes` calls
/// can cause the performance to drop significantly.
fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
/// Reconstructs `self` from a slice of byte slices, using direct indexing.
///
/// The slice should contain exactly `Self::SLICE_COUNT` elements.
/// This avoids the iterator chain overhead of `from_bytes`.
#[inline(always)]
fn from_byte_slices(bytes: &[&'a [u8]]) -> Self where Self: Sized {
Self::from_bytes(&mut bytes.iter().copied())
}
}

}
Expand Down
Loading
Loading