Skip to content

Commit 1c6e65a

Browse files
authored
RUST-1992 Minor parsing cleanup (#485)
1 parent 39d90f6 commit 1c6e65a

File tree

8 files changed

+104
-159
lines changed

8 files changed

+104
-159
lines changed

src/de/error.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ impl From<string::FromUtf8Error> for Error {
5858
}
5959
}
6060

61+
impl From<crate::raw::Error> for Error {
62+
fn from(value: crate::raw::Error) -> Self {
63+
Self::deserialization(value)
64+
}
65+
}
66+
6167
impl fmt::Display for Error {
6268
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
6369
match *self {

src/de/mod.rs

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ use crate::{
3939
};
4040

4141
use ::serde::{
42-
de::{DeserializeOwned, Error as _, Unexpected},
42+
de::{DeserializeOwned, Error as _},
4343
Deserialize,
4444
};
4545

@@ -68,32 +68,6 @@ enum DeserializerHint {
6868
RawBson,
6969
}
7070

71-
pub(crate) fn read_bool<R: Read>(mut reader: R) -> Result<bool> {
72-
let val = read_u8(&mut reader)?;
73-
if val > 1 {
74-
return Err(Error::invalid_value(
75-
Unexpected::Unsigned(val as u64),
76-
&"boolean must be stored as 0 or 1",
77-
));
78-
}
79-
80-
Ok(val != 0)
81-
}
82-
83-
#[inline]
84-
pub(crate) fn read_u8<R: Read + ?Sized>(reader: &mut R) -> Result<u8> {
85-
let mut buf = [0; 1];
86-
reader.read_exact(&mut buf)?;
87-
Ok(u8::from_le_bytes(buf))
88-
}
89-
90-
#[inline]
91-
pub(crate) fn read_i32<R: Read + ?Sized>(reader: &mut R) -> Result<i32> {
92-
let mut buf = [0; 4];
93-
reader.read_exact(&mut buf)?;
94-
Ok(i32::from_le_bytes(buf))
95-
}
96-
9771
impl Timestamp {
9872
pub(crate) fn from_reader<R: Read>(mut reader: R) -> Result<Self> {
9973
let mut bytes = [0; 8];
@@ -181,8 +155,10 @@ where
181155
Deserialize::deserialize(de)
182156
}
183157

184-
fn reader_to_vec<R: Read>(mut reader: R) -> Result<Vec<u8>> {
185-
let length = read_i32(&mut reader)?;
158+
pub(crate) fn reader_to_vec<R: Read>(mut reader: R) -> Result<Vec<u8>> {
159+
let mut buf = [0; 4];
160+
reader.read_exact(&mut buf)?;
161+
let length = i32::from_le_bytes(buf);
186162

187163
if length < MIN_BSON_DOCUMENT_SIZE {
188164
return Err(Error::custom("document size too small"));

src/de/raw.rs

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ struct DeserializerOptions {
5151
impl<'de> Deserializer<'de> {
5252
pub(crate) fn new(buf: &'de [u8], utf8_lossy: bool) -> Result<Self> {
5353
Ok(Self {
54-
element: RawElement::toplevel(buf).map_err(Error::deserialization)?,
54+
element: RawElement::toplevel(buf)?,
5555
options: DeserializerOptions {
5656
utf8_lossy,
5757
human_readable: false,
@@ -60,7 +60,7 @@ impl<'de> Deserializer<'de> {
6060
}
6161

6262
fn value(&self) -> Result<RawBsonRef<'de>> {
63-
self.element.value().map_err(Error::deserialization)
63+
Ok(self.element.value()?)
6464
}
6565

6666
/// Deserialize the element, using the type of the element along with the
@@ -70,11 +70,7 @@ impl<'de> Deserializer<'de> {
7070
V: serde::de::Visitor<'de>,
7171
{
7272
if self.options.utf8_lossy {
73-
if let Some(lossy) = self
74-
.element
75-
.value_utf8_lossy()
76-
.map_err(Error::deserialization)?
77-
{
73+
if let Some(lossy) = self.element.value_utf8_lossy()? {
7874
return match lossy {
7975
Utf8LossyBson::String(s) => visitor.visit_string(s),
8076
Utf8LossyBson::RegularExpression(re) => {
@@ -183,10 +179,7 @@ impl<'de> Deserializer<'de> {
183179

184180
fn get_string(&self) -> Result<Cow<'de, str>> {
185181
if self.options.utf8_lossy {
186-
let value = self
187-
.element
188-
.value_utf8_lossy()
189-
.map_err(Error::deserialization)?;
182+
let value = self.element.value_utf8_lossy()?;
190183
let s = match value {
191184
Some(Utf8LossyBson::String(s)) => s,
192185
_ => {
@@ -335,11 +328,7 @@ impl<'de> DocumentAccess<'de> {
335328
}
336329

337330
fn advance(&mut self) -> Result<()> {
338-
self.elem = self
339-
.iter
340-
.next()
341-
.transpose()
342-
.map_err(Error::deserialization)?;
331+
self.elem = self.iter.next().transpose()?;
343332
Ok(())
344333
}
345334

src/document.rs

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//! A BSON document represented as an associative HashMap with insertion ordering.
22
33
use std::{
4-
convert::TryInto,
54
error,
65
fmt::{self, Debug, Display, Formatter},
76
io::{Read, Write},
@@ -10,11 +9,10 @@ use std::{
109

1110
use ahash::RandomState;
1211
use indexmap::IndexMap;
13-
use serde::{de::Error, Deserialize};
12+
use serde::Deserialize;
1413

1514
use crate::{
1615
bson::{Array, Bson, Timestamp},
17-
de::{read_i32, MIN_BSON_DOCUMENT_SIZE},
1816
oid::ObjectId,
1917
spec::BinarySubtype,
2018
Binary,
@@ -548,22 +546,7 @@ impl Document {
548546
}
549547

550548
fn decode<R: Read + ?Sized>(reader: &mut R, utf_lossy: bool) -> crate::de::Result<Document> {
551-
let length = read_i32(reader)?;
552-
if length < MIN_BSON_DOCUMENT_SIZE {
553-
return Err(crate::de::Error::invalid_length(
554-
length as usize,
555-
&"document length must be at least 5",
556-
));
557-
}
558-
let ulen: usize =
559-
length
560-
.try_into()
561-
.map_err(|e| crate::de::Error::DeserializationError {
562-
message: format!("invalid document length: {}", e),
563-
})?;
564-
let mut buf = vec![0u8; ulen];
565-
buf[0..4].copy_from_slice(&length.to_le_bytes());
566-
reader.read_exact(&mut buf[4..])?;
549+
let buf = crate::de::reader_to_vec(reader)?;
567550
let deserializer = crate::de::RawDeserializer::new(&buf, utf_lossy)?;
568551
Document::deserialize(deserializer)
569552
}

src/raw/document.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,15 +519,11 @@ impl RawDocument {
519519
let buf = &self.as_bytes()[start_at..];
520520

521521
let mut splits = buf.splitn(2, |x| *x == 0);
522-
let value = splits
523-
.next()
524-
.ok_or_else(|| Error::new_without_key(ErrorKind::new_malformed("no value")))?;
522+
let value = splits.next().ok_or_else(|| Error::malformed("no value"))?;
525523
if splits.next().is_some() {
526524
Ok(value)
527525
} else {
528-
Err(Error::new_without_key(ErrorKind::new_malformed(
529-
"expected null terminator",
530-
)))
526+
Err(Error::malformed("expected null terminator"))
531527
}
532528
}
533529

src/raw/error.rs

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,18 @@ pub struct Error {
1414
}
1515

1616
impl Error {
17-
pub(crate) fn new_with_key(key: impl Into<String>, kind: ErrorKind) -> Self {
18-
Self {
19-
kind,
20-
key: Some(key.into()),
21-
}
17+
pub(crate) fn new(kind: ErrorKind) -> Self {
18+
Self { key: None, kind }
2219
}
2320

24-
pub(crate) fn new_without_key(kind: ErrorKind) -> Self {
25-
Self { key: None, kind }
21+
pub(crate) fn malformed(e: impl ToString) -> Self {
22+
Self::new(ErrorKind::MalformedValue {
23+
message: e.to_string(),
24+
})
2625
}
2726

28-
pub(crate) fn with_key(mut self, key: impl AsRef<str>) -> Self {
29-
self.key = Some(key.as_ref().to_string());
27+
pub(crate) fn with_key(mut self, key: impl Into<String>) -> Self {
28+
self.key = Some(key.into());
3029
self
3130
}
3231

@@ -48,14 +47,6 @@ pub enum ErrorKind {
4847
Utf8EncodingError(Utf8Error),
4948
}
5049

51-
impl ErrorKind {
52-
pub(crate) fn new_malformed(e: impl ToString) -> Self {
53-
ErrorKind::MalformedValue {
54-
message: e.to_string(),
55-
}
56-
}
57-
}
58-
5950
impl std::fmt::Display for Error {
6051
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
6152
let p = self
@@ -80,7 +71,7 @@ pub type Result<T> = std::result::Result<T, Error>;
8071

8172
/// Execute the provided closure, mapping the key of the returned error (if any) to the provided
8273
/// key.
83-
pub(crate) fn try_with_key<G, F: FnOnce() -> Result<G>>(key: impl AsRef<str>, f: F) -> Result<G> {
74+
pub(crate) fn try_with_key<G, F: FnOnce() -> Result<G>>(key: impl Into<String>, f: F) -> Result<G> {
8475
f().map_err(|e| e.with_key(key))
8576
}
8677

src/raw/iter.rs

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use std::convert::TryInto;
22

33
use crate::{
4-
de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE},
4+
de::{MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE},
55
oid::ObjectId,
6-
raw::{Error, ErrorKind, Result},
6+
raw::{Error, Result},
77
spec::{BinarySubtype, ElementType},
88
Bson,
99
DateTime,
@@ -18,6 +18,7 @@ use crate::{
1818
};
1919

2020
use super::{
21+
bool_from_slice,
2122
checked_add,
2223
error::try_with_key,
2324
f64_from_slice,
@@ -81,11 +82,11 @@ impl<'a> RawIter<'a> {
8182
fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> {
8283
let end = checked_add(start, num_bytes)?;
8384
if self.doc.as_bytes().get(start..end).is_none() {
84-
return Err(Error::new_without_key(ErrorKind::new_malformed(format!(
85+
return Err(Error::malformed(format!(
8586
"length exceeds remaining length of buffer: {} vs {}",
8687
num_bytes,
8788
self.doc.as_bytes().len() - start
88-
))));
89+
)));
8990
}
9091
Ok(())
9192
}
@@ -95,18 +96,16 @@ impl<'a> RawIter<'a> {
9596
let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize;
9697

9798
if size < MIN_BSON_DOCUMENT_SIZE as usize {
98-
return Err(Error::new_without_key(ErrorKind::new_malformed(format!(
99+
return Err(Error::malformed(format!(
99100
"document too small: {} bytes",
100101
size
101-
))));
102+
)));
102103
}
103104

104105
self.verify_enough_bytes(starting_at, size)?;
105106

106107
if self.doc.as_bytes()[starting_at + size - 1] != 0 {
107-
return Err(Error::new_without_key(ErrorKind::new_malformed(
108-
"not null terminated",
109-
)));
108+
return Err(Error::malformed("not null terminated"));
110109
}
111110
Ok(size)
112111
}
@@ -186,9 +185,9 @@ impl<'a> RawElement<'a> {
186185
ElementType::Array => {
187186
RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes(self.slice())?))
188187
}
189-
ElementType::Boolean => {
190-
RawBsonRef::Boolean(read_bool(self.slice()).map_err(|e| self.malformed_error(e))?)
191-
}
188+
ElementType::Boolean => RawBsonRef::Boolean(
189+
bool_from_slice(self.slice()).map_err(|e| self.malformed_error(e))?,
190+
),
192191
ElementType::DateTime => {
193192
RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice(self.slice())?))
194193
}
@@ -309,7 +308,7 @@ impl<'a> RawElement<'a> {
309308
}
310309

311310
fn malformed_error(&self, e: impl ToString) -> Error {
312-
Error::new_with_key(self.key, ErrorKind::new_malformed(e))
311+
Error::malformed(e).with_key(self.key)
313312
}
314313

315314
pub(crate) fn slice(&self) -> &'a [u8] {
@@ -336,7 +335,7 @@ impl<'a> RawElement<'a> {
336335
Ok(ObjectId::from_bytes(
337336
self.doc.as_bytes()[start_at..(start_at + 12)]
338337
.try_into()
339-
.map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?,
338+
.map_err(|e| Error::malformed(e).with_key(self.key))?,
340339
))
341340
}
342341
}
@@ -345,9 +344,7 @@ impl<'a> RawIter<'a> {
345344
fn get_next_length_at(&self, start_at: usize) -> Result<usize> {
346345
let len = i32_from_slice(&self.doc.as_bytes()[start_at..])?;
347346
if len < 0 {
348-
Err(Error::new_without_key(ErrorKind::new_malformed(
349-
"lengths can't be negative",
350-
)))
347+
Err(Error::malformed("lengths can't be negative"))
351348
} else {
352349
Ok(len as usize)
353350
}
@@ -366,15 +363,11 @@ impl<'a> Iterator for RawIter<'a> {
366363
return None;
367364
} else {
368365
self.valid = false;
369-
return Some(Err(Error::new_without_key(ErrorKind::new_malformed(
370-
"document not null terminated",
371-
))));
366+
return Some(Err(Error::malformed("document not null terminated")));
372367
}
373368
} else if self.offset >= self.doc.as_bytes().len() {
374369
self.valid = false;
375-
return Some(Err(Error::new_without_key(ErrorKind::new_malformed(
376-
"iteration overflowed document",
377-
))));
370+
return Some(Err(Error::malformed("iteration overflowed document")));
378371
}
379372

380373
let key = match self.doc.read_cstring_at(self.offset + 1) {
@@ -390,13 +383,11 @@ impl<'a> Iterator for RawIter<'a> {
390383
let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) {
391384
Some(et) => et,
392385
None => {
393-
return Err(Error::new_with_key(
394-
key,
395-
ErrorKind::new_malformed(format!(
396-
"invalid tag: {}",
397-
self.doc.as_bytes()[self.offset]
398-
)),
386+
return Err(Error::malformed(format!(
387+
"invalid tag: {}",
388+
self.doc.as_bytes()[self.offset]
399389
))
390+
.with_key(key))
400391
}
401392
};
402393

0 commit comments

Comments
 (0)