Skip to content

Commit 4819853

Browse files
authored
RUST-454 Implement error case BSON corpus tests (#181)
1 parent 7921a71 commit 4819853

File tree

14 files changed

+916
-127
lines changed

14 files changed

+916
-127
lines changed

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ byteorder = "1"
2828
chrono = "0.4"
2929
libc = "0.2"
3030
rand = "0.7"
31-
serde = "1.0"
31+
serde = { version = "1.0", features = ["derive"] }
3232
serde_json = { version = "1.0", features = ["preserve_order"] }
3333
time = "0.1"
3434
linked-hash-map = "0.5"
@@ -39,7 +39,6 @@ base64 = "0.12.1"
3939

4040
[dev-dependencies]
4141
assert_matches = "1.2"
42-
serde_derive = "1.0"
4342
serde_bytes = "0.11"
4443
pretty_assertions = "0.6.1"
4544

src/bson.rs

Lines changed: 40 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,12 @@ impl From<Binary> for Bson {
203203
}
204204
}
205205

206+
impl From<Timestamp> for Bson {
207+
fn from(ts: Timestamp) -> Bson {
208+
Bson::Timestamp(ts)
209+
}
210+
}
211+
206212
impl<T> From<&T> for Bson
207213
where
208214
T: Clone + Into<Bson>,
@@ -294,32 +300,7 @@ impl From<DbPointer> for Bson {
294300
}
295301
}
296302

297-
impl From<Value> for Bson {
298-
fn from(a: Value) -> Bson {
299-
match a {
300-
Value::Number(x) => x
301-
.as_i64()
302-
.map(|i| {
303-
if i >= i32::MIN as i64 && i <= i32::MAX as i64 {
304-
Bson::Int32(i as i32)
305-
} else {
306-
Bson::Int64(i)
307-
}
308-
})
309-
.or_else(|| x.as_u64().map(Bson::from))
310-
.or_else(|| x.as_f64().map(Bson::from))
311-
.unwrap_or_else(|| panic!("Invalid number value: {}", x)),
312-
Value::String(x) => x.into(),
313-
Value::Bool(x) => x.into(),
314-
Value::Array(x) => Bson::Array(x.into_iter().map(Bson::from).collect()),
315-
Value::Object(x) => Bson::from_extended_document(
316-
x.into_iter().map(|(k, v)| (k, Bson::from(v))).collect(),
317-
),
318-
Value::Null => Bson::Null,
319-
}
320-
}
321-
}
322-
303+
/// This will create the [relaxed Extended JSON v2](https://docs.mongodb.com/manual/reference/mongodb-extended-json/) representation of the provided [`Bson`](../enum.Bson.html).
323304
impl From<Bson> for Value {
324305
fn from(bson: Bson) -> Self {
325306
bson.into_relaxed_extjson()
@@ -754,31 +735,7 @@ impl Bson {
754735

755736
["$date"] => {
756737
if let Ok(date) = doc.get_i64("$date") {
757-
let mut num_secs = date / 1000;
758-
let mut num_millis = date % 1000;
759-
760-
// The chrono API only lets us create a DateTime with an i64 number of seconds
761-
// and a u32 number of nanoseconds. In the case of a negative timestamp, this
762-
// means that we need to turn the negative fractional part into a positive and
763-
// shift the number of seconds down. For example:
764-
//
765-
// date = -4300 ms
766-
// num_secs = date / 1000 = -4300 / 1000 = -4
767-
// num_millis = date % 1000 = -4300 % 1000 = -300
768-
//
769-
// Since num_millis is less than 0:
770-
// num_secs = num_secs -1 = -4 - 1 = -5
771-
// num_millis = num_nanos + 1000 = -300 + 1000 = 700
772-
//
773-
// Instead of -4 seconds and -300 milliseconds, we now have -5 seconds and +700
774-
// milliseconds, which expresses the same timestamp, but in a way we can create
775-
// a DateTime with.
776-
if num_millis < 0 {
777-
num_secs -= 1;
778-
num_millis += 1000;
779-
};
780-
781-
return Bson::DateTime(Utc.timestamp(num_secs, num_millis as u32 * 1_000_000));
738+
return Bson::DateTime(DateTime::from_i64(date).into());
782739
}
783740

784741
if let Ok(date) = doc.get_str("$date") {
@@ -1017,7 +974,7 @@ impl Timestamp {
1017974
///
1018975
/// Just a helper for convenience
1019976
///
1020-
/// ```rust,ignore
977+
/// ```rust
1021978
/// use serde::{Serialize, Deserialize};
1022979
/// use bson::DateTime;
1023980
///
@@ -1029,6 +986,37 @@ impl Timestamp {
1029986
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Copy, Clone)]
1030987
pub struct DateTime(pub chrono::DateTime<Utc>);
1031988

989+
impl DateTime {
990+
pub(crate) fn from_i64(date: i64) -> Self {
991+
let mut num_secs = date / 1000;
992+
let mut num_millis = date % 1000;
993+
994+
// The chrono API only lets us create a DateTime with an i64 number of seconds
995+
// and a u32 number of nanoseconds. In the case of a negative timestamp, this
996+
// means that we need to turn the negative fractional part into a positive and
997+
// shift the number of seconds down. For example:
998+
//
999+
// date = -4300 ms
1000+
// num_secs = date / 1000 = -4300 / 1000 = -4
1001+
// num_millis = date % 1000 = -4300 % 1000 = -300
1002+
//
1003+
// Since num_millis is less than 0:
1004+
// num_secs = num_secs -1 = -4 - 1 = -5
1005+
// num_millis = num_nanos + 1000 = -300 + 1000 = 700
1006+
//
1007+
// Instead of -4 seconds and -300 milliseconds, we now have -5 seconds and +700
1008+
// milliseconds, which expresses the same timestamp, but in a way we can create
1009+
// a DateTime with.
1010+
if num_millis < 0 {
1011+
num_secs -= 1;
1012+
num_millis += 1000;
1013+
};
1014+
1015+
Utc.timestamp(num_secs, num_millis as u32 * 1_000_000)
1016+
.into()
1017+
}
1018+
}
1019+
10321020
impl Deref for DateTime {
10331021
type Target = chrono::DateTime<Utc>;
10341022

src/de/error.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use std::{error, fmt, fmt::Display, io, string};
22

3+
use serde::de::{self, Unexpected};
4+
35
use crate::Bson;
4-
use de::Unexpected;
5-
use serde::de;
66

77
/// Possible errors that can arise during decoding.
88
#[derive(Debug)]

src/de/mod.rs

Lines changed: 111 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,41 @@ use crate::{
4444
Decimal128,
4545
};
4646

47-
use ::serde::de::{self, Error as _};
47+
use ::serde::{
48+
de::{Error as _, Unexpected},
49+
Deserialize,
50+
};
4851

4952
const MAX_BSON_SIZE: i32 = 16 * 1024 * 1024;
53+
pub(crate) const MIN_BSON_DOCUMENT_SIZE: i32 = 4 + 1; // 4 bytes for length, one byte for null terminator
54+
const MIN_BSON_STRING_SIZE: i32 = 4 + 1; // 4 bytes for length, one byte for null terminator
55+
const MIN_CODE_WITH_SCOPE_SIZE: i32 = 4 + MIN_BSON_STRING_SIZE + MIN_BSON_DOCUMENT_SIZE;
56+
57+
/// Run the provided closure, ensuring that over the course of its execution, exactly `length` bytes
58+
/// were read from the reader.
59+
pub(crate) fn ensure_read_exactly<F, R>(
60+
reader: &mut R,
61+
length: usize,
62+
error_message: &str,
63+
func: F,
64+
) -> Result<()>
65+
where
66+
F: FnOnce(&mut std::io::Cursor<Vec<u8>>) -> Result<()>,
67+
R: Read + ?Sized,
68+
{
69+
let mut buf = vec![0u8; length];
70+
reader.read_exact(&mut buf)?;
71+
let mut cursor = std::io::Cursor::new(buf);
72+
73+
func(&mut cursor)?;
74+
75+
if cursor.position() != length as u64 {
76+
return Err(Error::invalid_length(length, &error_message));
77+
}
78+
Ok(())
79+
}
5080

51-
fn read_string<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) -> crate::de::Result<String> {
81+
fn read_string<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) -> Result<String> {
5282
let len = reader.read_i32::<LittleEndian>()?;
5383

5484
// UTF-8 String must have at least 1 byte (the last 0x00).
@@ -68,12 +98,19 @@ fn read_string<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) -> crate::de:
6898
reader.take(len as u64 - 1).read_to_string(&mut s)?;
6999
s
70100
};
71-
reader.read_u8()?; // The last 0x00
101+
102+
// read the null terminator
103+
if reader.read_u8()? != 0 {
104+
return Err(Error::invalid_length(
105+
len as usize,
106+
&"contents of string longer than provided length",
107+
));
108+
}
72109

73110
Ok(s)
74111
}
75112

76-
fn read_cstring<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<String> {
113+
fn read_cstring<R: Read + ?Sized>(reader: &mut R) -> Result<String> {
77114
let mut v = Vec::new();
78115

79116
loop {
@@ -88,28 +125,28 @@ fn read_cstring<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<String> {
88125
}
89126

90127
#[inline]
91-
pub(crate) fn read_i32<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<i32> {
128+
pub(crate) fn read_i32<R: Read + ?Sized>(reader: &mut R) -> Result<i32> {
92129
reader.read_i32::<LittleEndian>().map_err(From::from)
93130
}
94131

95132
#[inline]
96-
fn read_i64<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<i64> {
133+
fn read_i64<R: Read + ?Sized>(reader: &mut R) -> Result<i64> {
97134
reader.read_i64::<LittleEndian>().map_err(From::from)
98135
}
99136

100137
/// Placeholder decoder for `Decimal128`. Reads 128 bits and just stores them, does no validation or
101138
/// parsing.
102139
#[cfg(not(feature = "decimal128"))]
103140
#[inline]
104-
fn read_f128<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Decimal128> {
141+
fn read_f128<R: Read + ?Sized>(reader: &mut R) -> Result<Decimal128> {
105142
let mut buf = [0u8; 128 / 8];
106143
reader.read_exact(&mut buf)?;
107144
Ok(Decimal128 { bytes: buf })
108145
}
109146

110147
#[cfg(feature = "decimal128")]
111148
#[inline]
112-
fn read_f128<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Decimal128> {
149+
fn read_f128<R: Read + ?Sized>(reader: &mut R) -> Result<Decimal128> {
113150
use std::mem;
114151

115152
let mut local_buf: [u8; 16] = unsafe { mem::MaybeUninit::uninit().assume_init() };
@@ -118,24 +155,27 @@ fn read_f128<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Decimal128>
118155
Ok(val)
119156
}
120157

121-
fn deserialize_array<R: Read + ?Sized>(
122-
reader: &mut R,
123-
utf8_lossy: bool,
124-
) -> crate::de::Result<Array> {
158+
fn deserialize_array<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) -> Result<Array> {
125159
let mut arr = Array::new();
126-
127-
// disregard the length: using Read::take causes infinite type recursion
128-
read_i32(reader)?;
129-
130-
loop {
131-
let tag = reader.read_u8()?;
132-
if tag == 0 {
133-
break;
134-
}
135-
136-
let (_, val) = deserialize_bson_kvp(reader, tag, utf8_lossy)?;
137-
arr.push(val)
138-
}
160+
let length = read_i32(reader)?;
161+
162+
ensure_read_exactly(
163+
reader,
164+
(length as usize) - 4,
165+
"array length longer than contents",
166+
|cursor| {
167+
loop {
168+
let tag = cursor.read_u8()?;
169+
if tag == 0 {
170+
break;
171+
}
172+
173+
let (_, val) = deserialize_bson_kvp(cursor, tag, utf8_lossy)?;
174+
arr.push(val)
175+
}
176+
Ok(())
177+
},
178+
)?;
139179

140180
Ok(arr)
141181
}
@@ -144,7 +184,7 @@ pub(crate) fn deserialize_bson_kvp<R: Read + ?Sized>(
144184
reader: &mut R,
145185
tag: u8,
146186
utf8_lossy: bool,
147-
) -> crate::de::Result<(String, Bson)> {
187+
) -> Result<(String, Bson)> {
148188
use spec::ElementType;
149189
let key = read_cstring(reader)?;
150190

@@ -165,7 +205,15 @@ pub(crate) fn deserialize_bson_kvp<R: Read + ?Sized>(
165205

166206
// Skip length data in old binary.
167207
if let BinarySubtype::BinaryOld = subtype {
168-
read_i32(reader)?;
208+
let data_len = read_i32(reader)?;
209+
210+
if data_len + 4 != len {
211+
return Err(Error::invalid_length(
212+
data_len as usize,
213+
&"0x02 length did not match top level binary length",
214+
));
215+
}
216+
169217
len -= 4;
170218
}
171219

@@ -181,7 +229,17 @@ pub(crate) fn deserialize_bson_kvp<R: Read + ?Sized>(
181229
}
182230
Bson::ObjectId(oid::ObjectId::with_bytes(objid))
183231
}
184-
Some(ElementType::Boolean) => Bson::Boolean(reader.read_u8()? != 0),
232+
Some(ElementType::Boolean) => {
233+
let val = reader.read_u8()?;
234+
if val > 1 {
235+
return Err(Error::invalid_value(
236+
Unexpected::Unsigned(val as u64),
237+
&"boolean must be stored as 0 or 1",
238+
));
239+
}
240+
241+
Bson::Boolean(val != 0)
242+
}
185243
Some(ElementType::Null) => Bson::Null,
186244
Some(ElementType::RegularExpression) => {
187245
let pattern = read_cstring(reader)?;
@@ -198,12 +256,29 @@ pub(crate) fn deserialize_bson_kvp<R: Read + ?Sized>(
198256
read_string(reader, utf8_lossy).map(Bson::JavaScriptCode)?
199257
}
200258
Some(ElementType::JavaScriptCodeWithScope) => {
201-
// disregard the length:
202-
// using Read::take causes infinite type recursion
203-
read_i32(reader)?;
259+
let length = read_i32(reader)?;
260+
if length < MIN_CODE_WITH_SCOPE_SIZE {
261+
return Err(Error::invalid_length(
262+
length as usize,
263+
&format!(
264+
"code with scope length must be at least {}",
265+
MIN_CODE_WITH_SCOPE_SIZE
266+
)
267+
.as_str(),
268+
));
269+
} else if length > MAX_BSON_SIZE {
270+
return Err(Error::invalid_length(
271+
length as usize,
272+
&"code with scope length too large",
273+
));
274+
}
275+
276+
let mut buf = vec![0u8; (length - 4) as usize];
277+
reader.read_exact(&mut buf)?;
204278

205-
let code = read_string(reader, utf8_lossy)?;
206-
let scope = Document::from_reader(reader)?;
279+
let mut slice = buf.as_slice();
280+
let code = read_string(&mut slice, utf8_lossy)?;
281+
let scope = Document::from_reader(&mut slice)?;
207282
Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope { code, scope })
208283
}
209284
Some(ElementType::Int32) => read_i32(reader).map(Bson::Int32)?,
@@ -256,10 +331,10 @@ pub(crate) fn deserialize_bson_kvp<R: Read + ?Sized>(
256331
}
257332

258333
/// Decode a BSON `Value` into a `T` Deserializable.
259-
pub fn from_bson<'de, T>(bson: Bson) -> crate::de::Result<T>
334+
pub fn from_bson<'de, T>(bson: Bson) -> Result<T>
260335
where
261-
T: de::Deserialize<'de>,
336+
T: Deserialize<'de>,
262337
{
263338
let de = Deserializer::new(bson);
264-
de::Deserialize::deserialize(de)
339+
Deserialize::deserialize(de)
265340
}

0 commit comments

Comments
 (0)