Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use bencher::{benchmark_group, benchmark_main, Bencher};
use columnar::{Clear, Columnar};
use columnar::bytes::{EncodeDecode, Sequence};

fn empty_copy(bencher: &mut Bencher) { _bench_copy(bencher, vec![(); 1024]); }
fn option_copy(bencher: &mut Bencher) { _bench_copy(bencher, vec![Option::<String>::None; 1024]); }
Expand Down Expand Up @@ -50,7 +51,7 @@ fn _bench_copy<T: Columnar+Eq>(bencher: &mut Bencher, record: T) where T::Contai
arena.push(&record);
}
use columnar::{AsBytes, Container};
bencher.bytes = 8 * arena.borrow().length_in_words() as u64;
bencher.bytes = Sequence::length_in_bytes(arena.borrow().as_bytes()) as u64;
arena.clear();

bencher.iter(|| {
Expand Down
11 changes: 6 additions & 5 deletions benches/serde.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use bencher::{benchmark_group, benchmark_main, Bencher};
use columnar::{Columnar, Container, Clear, AsBytes, FromBytes};
use columnar::bytes::{EncodeDecode, Sequence};
use serde::{Serialize, Deserialize};

fn goser_new(b: &mut Bencher) {
Expand All @@ -18,7 +19,7 @@ fn goser_push(b: &mut Bencher) {
container.push(&log);
}
let mut words = vec![];
::columnar::bytes::serialization::encode(&mut words, container.borrow().as_bytes());
Sequence::encode(&mut words, container.borrow().as_bytes());
b.bytes = 8 * words.len() as u64;
b.iter(|| {
container.clear();
Expand Down Expand Up @@ -49,11 +50,11 @@ fn goser_encode(b: &mut Bencher) {
container.push(&log);
}
let mut words = vec![];
::columnar::bytes::serialization::encode(&mut words, container.borrow().as_bytes());
Sequence::encode(&mut words, container.borrow().as_bytes());
b.bytes = 8 * words.len() as u64;
b.iter(|| {
words.clear();
::columnar::bytes::serialization::encode(&mut words, container.borrow().as_bytes());
Sequence::encode(&mut words, container.borrow().as_bytes());
bencher::black_box(&words);
});
}
Expand All @@ -66,10 +67,10 @@ fn goser_decode(b: &mut Bencher) {
for _ in 0..1024 {
container.push(&log);
}
::columnar::bytes::serialization::encode(&mut words, container.borrow().as_bytes());
Sequence::encode(&mut words, container.borrow().as_bytes());
b.bytes = 8 * words.len() as u64;
b.iter(|| {
let mut slices = ::columnar::bytes::serialization::decode(&mut words);
let mut slices = Sequence::decode(&mut words);
let foo = <<Log as Columnar>::Container as Container<Log>>::Borrowed::from_bytes(&mut slices);
bencher::black_box(foo);
});
Expand Down
42 changes: 37 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,6 @@ pub mod common {
pub trait AsBytes<'a> {
/// Presents `self` as a sequence of byte slices, with their required alignment.
fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])>;
/// The number of `u64` words required to record `self` as aligned bytes.
fn length_in_words(&self) -> usize {
self.as_bytes().map(|(_, x)| 1 + (x.len()/8) + if x.len() % 8 == 0 { 0 } else { 1 }).sum()
}
}

/// A type that can be reconstituted from byte slices with lifetime `'a`.
Expand All @@ -479,11 +475,47 @@ pub mod common {
///
/// The methods here line up with the `AsBytes` and `FromBytes` traits.
pub mod bytes {

/// A coupled encode/decode pair for byte sequences.
pub trait EncodeDecode {
/// Encoded length in number of `u64` words required.
fn length_in_words<'a, I>(bytes: I) -> usize where I : Iterator<Item=(u64, &'a [u8])>;
/// Encoded length in number of `u8` bytes required.
///
/// This method should always be eight times `Self::length_in_words`, and is provided for convenience and clarity.
fn length_in_bytes<'a, I>(bytes: I) -> usize where I : Iterator<Item=(u64, &'a [u8])> { 8 * Self::length_in_words(bytes) }
/// Encodes `bytes` into a sequence of `u64`.
fn encode<'a, I>(store: &mut Vec<u64>, bytes: I) where I : Iterator<Item=(u64, &'a [u8])>;
/// Writes `bytes` in the encoded format to an arbitrary writer.
fn write<'a, I, W: std::io::Write>(writer: W, bytes: I) -> std::io::Result<()> where I : Iterator<Item=(u64, &'a [u8])>;
/// Decodes bytes from a sequence of `u64`.
fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]>;
}

/// A sequential byte layout for `AsBytes` and `FromBytes` implementors.
///
/// The layout is aligned like a sequence of `u64`, where we repeatedly announce a length,
/// and then follow it by that many bytes. We may need to follow this with padding bytes.
pub mod serialization {
pub use serialization::Sequence;
mod serialization {

/// Encodes and decodes bytes sequences, by prepending the length and appending the all sequences.
pub struct Sequence;
impl super::EncodeDecode for Sequence {
fn length_in_words<'a, I>(bytes: I) -> usize where I : Iterator<Item=(u64, &'a [u8])> {
// Each byte slice has one `u64` for the length, and then as many `u64`s as needed to hold all bytes.
bytes.map(|(_align, bytes)| 1 + (bytes.len() + 7)/8).sum()
}
fn encode<'a, I>(store: &mut Vec<u64>, bytes: I) where I : Iterator<Item=(u64, &'a [u8])> {
encode(store, bytes)
}
fn write<'a, I, W: std::io::Write>(writer: W, bytes: I) -> std::io::Result<()> where I : Iterator<Item=(u64, &'a [u8])> {
write(writer, bytes)
}
fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]> {
decode(store)
}
}

/// Encodes a sequence of byte slices as their length followed by their bytes, aligned to 8 bytes.
///
Expand Down