Skip to content

Commit c698dbd

Browse files
committed
allow choosing return type of BytesEncode
- allow any type that holds bytes instead of just `Cow<'_, [u8]>` - allow any type that holds an error instead of just `BoxedError` - add hint to use writer instead of returning bytes (zero_copy method), retuns BoxedError for now - hint to using writers for serde serialization types
1 parent 5a10a00 commit c698dbd

File tree

13 files changed

+370
-178
lines changed

13 files changed

+370
-178
lines changed

heed-traits/src/lib.rs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
1010
#![warn(missing_docs)]
1111

12-
use std::borrow::Cow;
1312
use std::cmp::{Ord, Ordering};
1413
use std::error::Error as StdError;
14+
use std::io;
1515

1616
/// A boxed `Send + Sync + 'static` error.
1717
pub type BoxedError = Box<dyn StdError + Send + Sync + 'static>;
@@ -21,8 +21,48 @@ pub trait BytesEncode<'a> {
2121
/// The type to encode.
2222
type EItem: ?Sized + 'a;
2323

24+
/// The type containing the encoded bytes.
25+
type ReturnBytes: Into<Vec<u8>> + AsRef<[u8]> + 'a;
26+
27+
/// The error type to return when decoding goes wrong.
28+
type Error: StdError + Send + Sync + 'static;
29+
30+
/// This function can be used to hint callers of the
31+
/// [`bytes_encode`][BytesEncode::bytes_encode] function to use
32+
/// [`bytes_encode_into_writer`][BytesEncode::bytes_encode_into_writer] instead, if the latter
33+
/// runs faster (for example if it needs less heap allocations).
34+
///
35+
/// The default implementation returns `true` because the default implementation of
36+
/// [`bytes_encode_into_writer`][BytesEncode::bytes_encode_into_writer] forwards to
37+
/// [`bytes_encode`][BytesEncode::bytes_encode].
38+
fn zero_copy(item: &Self::EItem) -> bool {
39+
// This is preferred to renaming the function parameter (to _item) because IDEs can
40+
// autofill trait implementations, which will default the paramter name to _item then and
41+
// this could probably also mess with clippy's renamed_function_params lint.
42+
let _ = item;
43+
44+
true
45+
}
46+
2447
/// Encode the given item as bytes.
25-
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError>;
48+
fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error>;
49+
50+
/// Encode the given item as bytes and write it into the writer.
51+
///
52+
/// When implementing this, also take a look at [`zero_copy`][BytesEncode::zero_copy]'s
53+
/// documentation.
54+
///
55+
/// The default implementation forwards to [`bytes_encode`][BytesEncode::bytes_encode].
56+
fn bytes_encode_into_writer<W: io::Write>(
57+
item: &'a Self::EItem,
58+
mut writer: W,
59+
) -> Result<(), BoxedError> {
60+
let bytes = Self::bytes_encode(item)?;
61+
62+
writer.write_all(bytes.as_ref())?;
63+
64+
Ok(())
65+
}
2666
}
2767

2868
/// A trait that represents a decoding structure.

heed-types/src/bytes.rs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::borrow::Cow;
1+
use std::convert::Infallible;
22

33
use heed_traits::{BoxedError, BytesDecode, BytesEncode};
44

@@ -11,8 +11,12 @@ pub enum Bytes {}
1111
impl<'a> BytesEncode<'a> for Bytes {
1212
type EItem = [u8];
1313

14-
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
15-
Ok(Cow::Borrowed(item))
14+
type ReturnBytes = &'a [u8];
15+
16+
type Error = Infallible;
17+
18+
fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
19+
Ok(item)
1620
}
1721
}
1822

@@ -23,3 +27,26 @@ impl<'a> BytesDecode<'a> for Bytes {
2327
Ok(bytes)
2428
}
2529
}
30+
31+
/// Like [`Bytes`], but always contains exactly `N` (the generic parameter) bytes.
32+
pub enum FixedSizeBytes<const N: usize> {}
33+
34+
impl<'a, const N: usize> BytesEncode<'a> for FixedSizeBytes<N> {
35+
type EItem = [u8; N];
36+
37+
type ReturnBytes = &'a [u8; N];
38+
39+
type Error = Infallible;
40+
41+
fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
42+
Ok(item)
43+
}
44+
}
45+
46+
impl<'a, const N: usize> BytesDecode<'a> for FixedSizeBytes<N> {
47+
type DItem = &'a [u8; N];
48+
49+
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
50+
bytes.try_into().map_err(Into::into)
51+
}
52+
}

heed-types/src/integer.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::borrow::Cow;
1+
use std::convert::Infallible;
22
use std::marker::PhantomData;
33
use std::mem::size_of;
44

@@ -11,8 +11,12 @@ pub struct U8;
1111
impl BytesEncode<'_> for U8 {
1212
type EItem = u8;
1313

14-
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
15-
Ok(Cow::from([*item].to_vec()))
14+
type ReturnBytes = [u8; 1];
15+
16+
type Error = Infallible;
17+
18+
fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
19+
Ok([*item])
1620
}
1721
}
1822

@@ -30,8 +34,12 @@ pub struct I8;
3034
impl BytesEncode<'_> for I8 {
3135
type EItem = i8;
3236

33-
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
34-
Ok(Cow::from([*item as u8].to_vec()))
37+
type ReturnBytes = [u8; 1];
38+
39+
type Error = Infallible;
40+
41+
fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
42+
Ok([*item as u8])
3543
}
3644
}
3745

@@ -54,10 +62,14 @@ macro_rules! define_type {
5462
impl<O: ByteOrder> BytesEncode<'_> for $name<O> {
5563
type EItem = $native;
5664

57-
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
58-
let mut buf = vec![0; size_of::<Self::EItem>()];
65+
type ReturnBytes = [u8; size_of::<$native>()];
66+
67+
type Error = Infallible;
68+
69+
fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
70+
let mut buf = [0; size_of::<$native>()];
5971
O::$write_method(&mut buf, *item);
60-
Ok(Cow::from(buf))
72+
Ok(buf)
6173
}
6274
}
6375

heed-types/src/serde_bincode.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::borrow::Cow;
2-
31
use heed_traits::{BoxedError, BytesDecode, BytesEncode};
42
use serde::{Deserialize, Serialize};
53

@@ -14,8 +12,24 @@ where
1412
{
1513
type EItem = T;
1614

17-
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
18-
bincode::serialize(item).map(Cow::Owned).map_err(Into::into)
15+
type ReturnBytes = Vec<u8>;
16+
17+
type Error = bincode::Error;
18+
19+
fn zero_copy(_item: &Self::EItem) -> bool {
20+
false
21+
}
22+
23+
fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
24+
bincode::serialize(item)
25+
}
26+
27+
fn bytes_encode_into_writer<W: std::io::Write>(
28+
item: &'a Self::EItem,
29+
writer: W,
30+
) -> Result<(), BoxedError> {
31+
bincode::serialize_into(writer, item)?;
32+
Ok(())
1933
}
2034
}
2135

heed-types/src/serde_json.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::borrow::Cow;
2-
31
use heed_traits::{BoxedError, BytesDecode, BytesEncode};
42
use serde::{Deserialize, Serialize};
53

@@ -14,8 +12,24 @@ where
1412
{
1513
type EItem = T;
1614

17-
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
18-
serde_json::to_vec(item).map(Cow::Owned).map_err(Into::into)
15+
type ReturnBytes = Vec<u8>;
16+
17+
type Error = serde_json::Error;
18+
19+
fn zero_copy(_item: &Self::EItem) -> bool {
20+
false
21+
}
22+
23+
fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
24+
serde_json::to_vec(item)
25+
}
26+
27+
fn bytes_encode_into_writer<W: std::io::Write>(
28+
item: &'a Self::EItem,
29+
writer: W,
30+
) -> Result<(), BoxedError> {
31+
serde_json::to_writer(writer, item)?;
32+
Ok(())
1933
}
2034
}
2135

heed-types/src/serde_rmp.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::borrow::Cow;
2-
31
use heed_traits::{BoxedError, BytesDecode, BytesEncode};
42
use serde::{Deserialize, Serialize};
53

@@ -14,8 +12,24 @@ where
1412
{
1513
type EItem = T;
1614

17-
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
18-
rmp_serde::to_vec(item).map(Cow::Owned).map_err(Into::into)
15+
type ReturnBytes = Vec<u8>;
16+
17+
type Error = rmp_serde::encode::Error;
18+
19+
fn zero_copy(_item: &Self::EItem) -> bool {
20+
false
21+
}
22+
23+
fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
24+
rmp_serde::to_vec(item)
25+
}
26+
27+
fn bytes_encode_into_writer<W: std::io::Write>(
28+
item: &'a Self::EItem,
29+
mut writer: W,
30+
) -> Result<(), BoxedError> {
31+
rmp_serde::encode::write(&mut writer, item)?;
32+
Ok(())
1933
}
2034
}
2135

heed-types/src/str.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
1-
use std::borrow::Cow;
2-
use std::str;
1+
use std::convert::Infallible;
32

43
use heed_traits::{BoxedError, BytesDecode, BytesEncode};
54

6-
/// Describes a [`prim@str`].
5+
/// Describes a [`str`].
76
pub enum Str {}
87

9-
impl BytesEncode<'_> for Str {
8+
impl<'a> BytesEncode<'a> for Str {
109
type EItem = str;
1110

12-
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
13-
Ok(Cow::Borrowed(item.as_bytes()))
11+
type ReturnBytes = &'a [u8];
12+
13+
type Error = Infallible;
14+
15+
fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
16+
Ok(item.as_bytes())
1417
}
1518
}
1619

1720
impl<'a> BytesDecode<'a> for Str {
1821
type DItem = &'a str;
1922

2023
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
21-
str::from_utf8(bytes).map_err(Into::into)
24+
std::str::from_utf8(bytes).map_err(Into::into)
2225
}
2326
}

heed-types/src/unit.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::borrow::Cow;
1+
use std::convert::Infallible;
22
use std::{error, fmt};
33

44
use heed_traits::{BoxedError, BytesDecode, BytesEncode};
@@ -9,8 +9,12 @@ pub enum Unit {}
99
impl BytesEncode<'_> for Unit {
1010
type EItem = ();
1111

12-
fn bytes_encode(_item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
13-
Ok(Cow::Borrowed(&[]))
12+
type ReturnBytes = [u8; 0];
13+
14+
type Error = Infallible;
15+
16+
fn bytes_encode(&(): &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
17+
Ok([])
1418
}
1519
}
1620

heed/src/cookbook.rs

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -144,15 +144,15 @@
144144
//! to create codecs to encode prefixes when possible instead of using a slice of bytes.
145145
//!
146146
//! ```
147-
//! use std::borrow::Cow;
147+
//! use std::convert::Infallible;
148148
//! use std::error::Error;
149149
//! use std::fs;
150150
//! use std::path::Path;
151151
//!
152152
//! use heed::types::*;
153153
//! use heed::{BoxedError, BytesDecode, BytesEncode, Database, EnvOpenOptions};
154154
//!
155-
//! #[derive(Debug, PartialEq, Eq)]
155+
//! #[derive(Debug, Clone, Copy, PartialEq, Eq)]
156156
//! pub enum Level {
157157
//! Debug,
158158
//! Warn,
@@ -170,18 +170,20 @@
170170
//! impl<'a> BytesEncode<'a> for LogKeyCodec {
171171
//! type EItem = LogKey;
172172
//!
173+
//! type ReturnBytes = [u8; 5];
174+
//!
175+
//! type Error = Infallible;
176+
//!
173177
//! /// Encodes the u32 timestamp in big endian followed by the log level with a single byte.
174-
//! fn bytes_encode(log: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
175-
//! let (timestamp_bytes, level_byte) = match log {
176-
//! LogKey { timestamp, level: Level::Debug } => (timestamp.to_be_bytes(), 0),
177-
//! LogKey { timestamp, level: Level::Warn } => (timestamp.to_be_bytes(), 1),
178-
//! LogKey { timestamp, level: Level::Error } => (timestamp.to_be_bytes(), 2),
179-
//! };
178+
//! fn bytes_encode(log: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
179+
//! let mut output = [0; 5];
180180
//!
181-
//! let mut output = Vec::new();
182-
//! output.extend_from_slice(&timestamp_bytes);
183-
//! output.push(level_byte);
184-
//! Ok(Cow::Owned(output))
181+
//! let [timestamp @ .., level] = &mut output;
182+
//!
183+
//! *timestamp = log.timestamp.to_be_bytes();
184+
//! *level = log.level as u8;
185+
//!
186+
//! Ok(output)
185187
//! }
186188
//! }
187189
//!
@@ -216,9 +218,14 @@
216218
//! impl<'a> BytesEncode<'a> for LogAtHalfTimestampCodec {
217219
//! type EItem = u32;
218220
//!
221+
//! type ReturnBytes = [u8; 2];
222+
//!
223+
//! type Error = Infallible;
224+
//!
219225
//! /// This method encodes only the prefix of the keys in this particular case, the timestamp.
220-
//! fn bytes_encode(half_timestamp: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
221-
//! Ok(Cow::Owned(half_timestamp.to_be_bytes()[..2].to_vec()))
226+
//! fn bytes_encode(half_timestamp: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
227+
//! let [bytes @ .., _, _] = half_timestamp.to_be_bytes();
228+
//! Ok(bytes)
222229
//! }
223230
//! }
224231
//!

0 commit comments

Comments
 (0)