Skip to content

Commit e79f411

Browse files
committed
Provide serialization functions for use with serde macros, rather than introducing a new type
1 parent b9c7bb7 commit e79f411

File tree

3 files changed

+85
-166
lines changed

3 files changed

+85
-166
lines changed

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@ license = "MIT"
77
edition = "2021"
88

99
[dependencies]
10-
ark-serialize = { version = "0.4", features = ["derive"] }
1110
base64 = "0.22"
12-
derive_more = "0.99"
1311
serde = "1.0"
1412

1513
[dev-dependencies]
1614
bincode = "1.3"
1715
rand = "0.8"
16+
serde = { version = "1.0", features = ["derive"] }
1817
serde_json = "1.0"

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22

33
_Binary blobs with intelligent serialization._
44

5-
This crate provides the `Base64Bytes` type. This type behaves like a `Vec<u8>` in almost all cases
6-
except serialization. Where `Vec<u8>` always serializes as an array of bytes, `Base64Bytes` tries to
7-
make an intelligent decision about how to serialize based on the serialization format.
5+
Where `Vec<u8>` always serializes as an array of bytes, this crate provides serialization functions
6+
which try to make an intelligent decision about how to serialize a byte vector based on the
7+
serialization format.
88

99
For binary formats like [`bincode`](https://crates.io/crates/bincode), the array-of-bytes
1010
serialization works great: it is compact and introduces very little overhead. But for human-readable
1111
types such as [`json`](https://crates.io/crates/serde_json), it's far from ideal. The text encoding
1212
of an array introduces substantial overhead, and the resulting array of opaque bytes isn't
1313
particularly readable anyways.
1414

15-
`Base64Bytes` uses the [`is_human_readable`](https://docs.rs/serde/latest/serde/trait.Serializer.html#method.is_human_readable)
15+
`base64-bytes` uses the [`is_human_readable`](https://docs.rs/serde/latest/serde/trait.Serializer.html#method.is_human_readable)
1616
property of a serializer to distinguish these cases. For binary formats, it uses the default
1717
`Vec<u8>` serialization. For human-readable formats, it uses a much more compact and conventional
1818
base 64 encoding.

src/lib.rs

Lines changed: 80 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1,194 +1,114 @@
11
//! Intelligent serialization for binary blobs.
2+
//!
3+
//! Where `Vec<u8>` always serializes as an array of bytes, this crate provides serialization
4+
//! functions which try to make an intelligent decision about how to serialize a byte vector based
5+
//! on the serialization format.
6+
//!
7+
//! For binary formats like [`bincode`](https://docs.rs/bincode/latest/bincode/), the array-of-bytes
8+
//! serialization works great: it is compact and introduces very little overhead. But for
9+
//! human-readable types such as [`serde_json`](https://docs.rs/serde_json/latest/serde_json/), it's
10+
//! far from ideal. The text encoding of an array introduces substantial overhead, and the resulting
11+
//! array of opaque bytes isn't particularly readable anyways.
12+
//!
13+
//! `base64-bytes` uses the [`is_human_readable`](serde::Serializer::is_human_readable) property of
14+
//! a serializer to distinguish these cases. For binary formats, it uses the default `Vec<u8>`
15+
//! serialization. For human-readable formats, it uses a much more compact and conventional base 64
16+
//! encoding.
17+
//!
18+
//! # Usage
19+
//!
20+
//! The interface consists of [`serialize`] and [`deserialize`] functions. While these _can_ be
21+
//! called directly, they are intended to be used with serde's
22+
//! [field attributes](https://serde.rs/field-attrs.html) controlling serialization, like:
23+
//!
24+
//! ```
25+
//! use serde::{Deserialize, Serialize};
26+
//!
27+
//! #[derive(Deserialize, Serialize)]
28+
//! struct SomeType {
29+
//! #[serde(
30+
//! serialize_with = "base64_bytes::serialize",
31+
//! deserialize_with = "base64_bytes::deserialize",
32+
//! )]
33+
//! bytes: Vec<u8>,
34+
//! }
35+
//! ```
36+
//!
37+
//! Or, as a shorthand:
38+
//!
39+
//! ```
40+
//! use serde::{Deserialize, Serialize};
41+
//!
42+
//! #[derive(Deserialize, Serialize)]
43+
//! struct SomeType {
44+
//! #[serde(with = "base64_bytes")]
45+
//! bytes: Vec<u8>,
46+
//! }
47+
//! ```
48+
//!
249
3-
use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
450
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
5-
use derive_more::{From, Into};
651
use serde::{
752
de::{Deserialize, Deserializer, Error},
853
ser::{Serialize, Serializer},
954
};
10-
use std::{
11-
ops::{Deref, DerefMut},
12-
slice::SliceIndex,
13-
};
14-
15-
/// An unstructured byte array with smart serialization.
16-
///
17-
/// [`Base64Bytes`] mostly acts as a simple byte array, `Vec<u8>`. It can easily be converted to and
18-
/// from a `Vec<u8>`, and it implements many of the same traits as [`Vec`]. In fact internally it
19-
/// merely wraps a `Vec<u8>`.
20-
///
21-
/// The only difference is in how it serializes. `Vec<u8>` serializes very efficiently using
22-
/// `bincode`, but using `serde_json`, it serializes as a JSON array of integers, which is
23-
/// unconventional and inefficient. It is better, in JSON, to serialize binary data as a
24-
/// base64-encoded string. [`Base64Bytes`] uses the
25-
/// [`is_human_readable`](Serializer::is_human_readable) property of a [`Serializer`] to detect
26-
/// whether we are serializing for a compact binary format (like `bincode`) or a human-readable
27-
/// format (like JSON). In the former cases, it serializes directly as an array of bytes. In the
28-
/// latter case, it serializes as a string using base 64.
29-
#[derive(
30-
Clone,
31-
Debug,
32-
Default,
33-
PartialEq,
34-
Eq,
35-
Hash,
36-
PartialOrd,
37-
Ord,
38-
From,
39-
Into,
40-
CanonicalSerialize,
41-
CanonicalDeserialize,
42-
)]
43-
pub struct Base64Bytes(Vec<u8>);
44-
45-
impl Base64Bytes {
46-
pub fn get<I>(&self, index: I) -> Option<&I::Output>
47-
where
48-
I: SliceIndex<[u8]>,
49-
{
50-
self.0.get(index)
51-
}
52-
}
53-
54-
impl Serialize for Base64Bytes {
55-
fn serialize<S: Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
56-
if s.is_human_readable() {
57-
BASE64.encode(self).serialize(s)
58-
} else {
59-
self.0.serialize(s)
60-
}
61-
}
62-
}
63-
64-
impl<'a> Deserialize<'a> for Base64Bytes {
65-
fn deserialize<D: Deserializer<'a>>(d: D) -> Result<Self, D::Error> {
66-
if d.is_human_readable() {
67-
Ok(Self(BASE64.decode(String::deserialize(d)?).map_err(
68-
|err| D::Error::custom(format!("invalid base64: {err}")),
69-
)?))
70-
} else {
71-
Ok(Self(Vec::deserialize(d)?))
72-
}
73-
}
74-
}
75-
76-
impl From<&[u8]> for Base64Bytes {
77-
fn from(bytes: &[u8]) -> Self {
78-
Self(bytes.into())
79-
}
80-
}
81-
82-
impl<const N: usize> From<[u8; N]> for Base64Bytes {
83-
fn from(bytes: [u8; N]) -> Self {
84-
Self(bytes.into())
85-
}
86-
}
87-
88-
impl<const N: usize> From<&[u8; N]> for Base64Bytes {
89-
fn from(bytes: &[u8; N]) -> Self {
90-
Self(bytes.into())
91-
}
92-
}
93-
94-
impl FromIterator<u8> for Base64Bytes {
95-
fn from_iter<I: IntoIterator<Item = u8>>(iter: I) -> Self {
96-
Self(iter.into_iter().collect())
97-
}
98-
}
9955

100-
impl AsRef<[u8]> for Base64Bytes {
101-
fn as_ref(&self) -> &[u8] {
102-
self.0.as_ref()
56+
/// Serialize a byte vector.
57+
pub fn serialize<S: Serializer, T: AsRef<[u8]>>(v: &T, s: S) -> Result<S::Ok, S::Error> {
58+
if s.is_human_readable() {
59+
BASE64.encode(v).serialize(s)
60+
} else {
61+
v.as_ref().serialize(s)
10362
}
10463
}
10564

106-
impl AsMut<[u8]> for Base64Bytes {
107-
fn as_mut(&mut self) -> &mut [u8] {
108-
self.0.as_mut()
109-
}
110-
}
111-
112-
impl Deref for Base64Bytes {
113-
type Target = [u8];
114-
115-
fn deref(&self) -> &[u8] {
116-
self.as_ref()
117-
}
118-
}
119-
120-
impl DerefMut for Base64Bytes {
121-
fn deref_mut(&mut self) -> &mut [u8] {
122-
self.as_mut()
123-
}
124-
}
125-
126-
impl PartialEq<[u8]> for Base64Bytes {
127-
fn eq(&self, other: &[u8]) -> bool {
128-
self.as_ref() == other
129-
}
130-
}
131-
132-
impl<const N: usize> PartialEq<[u8; N]> for Base64Bytes {
133-
fn eq(&self, other: &[u8; N]) -> bool {
134-
self.as_ref() == other
135-
}
136-
}
137-
138-
impl PartialEq<Vec<u8>> for Base64Bytes {
139-
fn eq(&self, other: &Vec<u8>) -> bool {
140-
self.0 == *other
141-
}
142-
}
143-
144-
impl<T> Extend<T> for Base64Bytes
145-
where
146-
Vec<u8>: Extend<T>,
147-
{
148-
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
149-
self.0.extend(iter);
150-
}
151-
}
152-
153-
/// Create a [`Base64Bytes`] blob from elements.
154-
///
155-
/// This macro works exactly like the standard `vec![...]` macro.
156-
#[macro_export]
157-
macro_rules! base64_bytes {
158-
[$($elem:expr),* $(,)?] => {
159-
$crate::Base64Bytes::from(vec![$($elem),*])
160-
};
161-
[$elem:expr; $size:expr] => {
162-
$crate::Base64Bytes::from(vec![$elem; $size])
65+
/// Deserialize a byte vector.
66+
pub fn deserialize<'a, D: Deserializer<'a>>(d: D) -> Result<Vec<u8>, D::Error> {
67+
if d.is_human_readable() {
68+
Ok(BASE64
69+
.decode(String::deserialize(d)?)
70+
.map_err(|err| D::Error::custom(format!("invalid base64: {err}")))?)
71+
} else {
72+
Ok(Vec::deserialize(d)?)
16373
}
16474
}
16575

16676
#[cfg(test)]
16777
mod test {
168-
use super::*;
78+
use crate::BASE64;
79+
use base64::Engine;
16980
use rand::RngCore;
81+
use serde::{Deserialize, Serialize};
82+
83+
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)]
84+
struct Test {
85+
#[serde(with = "crate")]
86+
bytes: Vec<u8>,
87+
}
17088

17189
#[test]
17290
fn test_bytes_serde() {
17391
let mut rng = rand::thread_rng();
17492

17593
for len in [0, 1, 10, 1000] {
176-
let mut bytes = base64_bytes![0; len];
177-
rng.fill_bytes(&mut bytes);
94+
let mut t = Test {
95+
bytes: vec![0; len],
96+
};
97+
rng.fill_bytes(&mut t.bytes);
17898

17999
// The binary serialization should be highly efficient: just the length followed by the
180100
// raw bytes.
181-
let binary = bincode::serialize(&bytes).unwrap();
101+
let binary = bincode::serialize(&t).unwrap();
182102
assert_eq!(binary[..8], (len as u64).to_le_bytes());
183-
assert_eq!(bytes, binary[8..]);
103+
assert_eq!(t.bytes, binary[8..]);
184104
// Check deserialization.
185-
assert_eq!(bytes, bincode::deserialize::<Base64Bytes>(&binary).unwrap());
105+
assert_eq!(t, bincode::deserialize::<Test>(&binary).unwrap());
186106

187107
// The JSON serialization should return a base 64 string.
188-
let json = serde_json::to_value(&bytes).unwrap();
189-
assert_eq!(json.as_str().unwrap(), BASE64.encode(&bytes));
108+
let json = serde_json::to_value(&t).unwrap();
109+
assert_eq!(json["bytes"].as_str().unwrap(), BASE64.encode(&t.bytes));
190110
// Check deserialization.
191-
assert_eq!(bytes, serde_json::from_value::<Base64Bytes>(json).unwrap());
111+
assert_eq!(t, serde_json::from_value::<Test>(json).unwrap());
192112
}
193113
}
194114
}

0 commit comments

Comments
 (0)