Skip to content

Commit a9cdc3a

Browse files
committed
Deserialize non-byte strings into raw buffers
Allow deserialization of non-byte string values into raw byte buffers. In cases where a value is a non-byte string, a byte buffer can be used to capture the raw encoded value. For instance, assuming a dictionary with an `info` key which has a dictionary value: ``` struct Metainfo { info: ByteBuf, } ``` could be used to capture the raw bytes of the encoded `info` dictionary value. For untrusted input, the value should be verified as having the correct type (e.g. a dictionary) instead of a byte string which contains the raw encoded value.
1 parent c511b2d commit a9cdc3a

File tree

2 files changed

+181
-0
lines changed

2 files changed

+181
-0
lines changed

CHANGELOG.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,27 @@
11
# CHANGELOG
22

3+
## Unreleased
4+
5+
### Added
6+
7+
* Allow deserialization of non-byte string values into raw byte buffers. In
8+
cases where a value is a non-byte string, a byte buffer can be used to capture
9+
the raw encoded value. For instance, assuming a dictionary with an `info`
10+
key which has a dictionary value:
11+
12+
```
13+
#[derive(Deserialize)]
14+
struct Metainfo {
15+
info: ByteBuf,
16+
}
17+
```
18+
19+
could be used to capture the raw bytes of the encoded `info` dictionary value.
20+
21+
For untrusted input, the value should be verified as having the correct type
22+
(e.g. a dictionary) instead of a byte string which contains the raw encoded
23+
value.
24+
325
## v0.3.0
426

527
### Added

src/de.rs

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,97 @@ where
181181
}
182182
Ok(buf)
183183
}
184+
185+
fn capture_byte_string_len(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
186+
let mut len_buf = Vec::new();
187+
loop {
188+
match self.parse_next()? {
189+
b':' => {
190+
let len = String::from_utf8(len_buf.clone())?.parse()?;
191+
buf.extend(len_buf);
192+
buf.push(b':');
193+
return Ok(len);
194+
}
195+
n @ b'0'..=b'9' => len_buf.push(n),
196+
_ => return Err(Error::InvalidByteStrLen),
197+
}
198+
}
199+
}
200+
201+
fn capture_byte_string(&mut self, buf: &mut Vec<u8>) -> Result<()> {
202+
let len = self.capture_byte_string_len(buf)?;
203+
buf.reserve(len);
204+
for _ in 0..len {
205+
buf.push(self.parse_next()?);
206+
}
207+
Ok(())
208+
}
209+
210+
fn capture_integer(&mut self, buf: &mut Vec<u8>) -> Result<()> {
211+
buf.push(self.parse_next()?);
212+
213+
match self.parse_peek()? {
214+
b'-' => buf.push(self.parse_next()?),
215+
b'0'..=b'9' => {}
216+
_ => return Err(Error::InvalidInteger),
217+
}
218+
219+
loop {
220+
match self.parse_next()? {
221+
b'e' => {
222+
buf.push(b'e');
223+
return Ok(());
224+
}
225+
n @ b'0'..=b'9' => buf.push(n),
226+
_ => return Err(Error::InvalidInteger),
227+
}
228+
}
229+
}
230+
231+
fn capture_list(&mut self, buf: &mut Vec<u8>) -> Result<()> {
232+
buf.push(self.parse_next()?);
233+
234+
loop {
235+
match self.parse_peek()? {
236+
b'e' => {
237+
buf.push(self.parse_next()?);
238+
return Ok(());
239+
}
240+
b'0'..=b'9' => self.capture_byte_string(buf)?,
241+
b'i' => self.capture_integer(buf)?,
242+
b'l' => self.capture_list(buf)?,
243+
b'd' => self.capture_dict(buf)?,
244+
_ => return Err(Error::InvalidDict),
245+
}
246+
}
247+
}
248+
249+
fn capture_dict(&mut self, buf: &mut Vec<u8>) -> Result<()> {
250+
buf.push(self.parse_next()?);
251+
252+
loop {
253+
match self.parse_peek()? {
254+
b'0'..=b'9' => self.capture_byte_string(buf)?,
255+
b'e' => {
256+
buf.push(self.parse_next()?);
257+
return Ok(());
258+
}
259+
_ => {
260+
return Err(Error::InvalidDict);
261+
}
262+
}
263+
264+
match self.parse_peek()? {
265+
b'0'..=b'9' => self.capture_byte_string(buf)?,
266+
b'i' => self.capture_integer(buf)?,
267+
b'l' => self.capture_list(buf)?,
268+
b'd' => self.capture_dict(buf)?,
269+
_ => {
270+
return Err(Error::InvalidDict);
271+
}
272+
}
273+
}
274+
}
184275
}
185276

186277
#[cfg(feature = "std")]
@@ -366,6 +457,21 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
366457
let bytes = self.parse_bytes()?;
367458
visitor.visit_byte_buf(bytes)
368459
}
460+
b'i' => {
461+
let mut bytes = Vec::new();
462+
self.capture_integer(&mut bytes)?;
463+
visitor.visit_byte_buf(bytes)
464+
}
465+
b'l' => {
466+
let mut bytes = Vec::new();
467+
self.capture_list(&mut bytes)?;
468+
visitor.visit_byte_buf(bytes)
469+
}
470+
b'd' => {
471+
let mut bytes = Vec::new();
472+
self.capture_dict(&mut bytes)?;
473+
visitor.visit_byte_buf(bytes)
474+
}
369475
_ => Err(self.unexpected_type_err(&visitor)?),
370476
}
371477
}
@@ -586,6 +692,7 @@ where
586692
#[cfg(test)]
587693
mod tests {
588694
use super::*;
695+
use serde_bytes::ByteBuf;
589696
use serde_derive::Deserialize;
590697

591698
#[cfg(all(feature = "alloc", not(feature = "std")))]
@@ -676,4 +783,56 @@ mod tests {
676783
assert_eq!(s, expected);
677784
Ok(())
678785
}
786+
787+
#[test]
788+
fn test_deserialize_integer_as_raw_bytes() -> Result<()> {
789+
#[derive(Debug, PartialEq, Deserialize)]
790+
struct S(ByteBuf);
791+
792+
let input = "i-1234e";
793+
let s: S = from_slice(input.as_bytes())?;
794+
let expected = S(ByteBuf::from(input.as_bytes().to_vec()));
795+
assert_eq!(s, expected);
796+
Ok(())
797+
}
798+
799+
#[test]
800+
fn test_deserialize_list_as_raw_bytes() -> Result<()> {
801+
#[derive(Debug, PartialEq, Deserialize)]
802+
struct S(ByteBuf);
803+
804+
let input = "l4:spam4:eggse";
805+
let s: S = from_slice(input.as_bytes())?;
806+
let expected = S(ByteBuf::from(input.as_bytes().to_vec()));
807+
assert_eq!(s, expected);
808+
Ok(())
809+
}
810+
811+
#[test]
812+
fn test_deserialize_map_value_as_raw_bytes() -> Result<()> {
813+
#[derive(Debug, PartialEq, Deserialize)]
814+
struct S {
815+
spam: ByteBuf,
816+
}
817+
818+
let input = "d4:spamd1:a1:bee";
819+
let s: S = from_slice(input.as_bytes())?;
820+
let expected = S {
821+
spam: ByteBuf::from(b"d1:a1:be".to_vec()),
822+
};
823+
assert_eq!(s, expected);
824+
Ok(())
825+
}
826+
827+
#[test]
828+
fn test_deserialize_map_as_raw_bytes() -> Result<()> {
829+
#[derive(Debug, PartialEq, Deserialize)]
830+
struct S(ByteBuf);
831+
832+
let input = "d4:spamd1:a1:bee";
833+
let s: S = from_slice(input.as_bytes())?;
834+
let expected = S(ByteBuf::from(input.as_bytes().to_vec()));
835+
assert_eq!(s, expected);
836+
Ok(())
837+
}
679838
}

0 commit comments

Comments
 (0)