Skip to content

Commit a5c1e20

Browse files
authored
RUST-284 Incorporate raw BSON code from rawbson = "0.2.1" in mod raw (#229)
Thanks to @jcdyer for contributing the bulk of this code!
1 parent ce50e17 commit a5c1e20

File tree

17 files changed

+2678
-25
lines changed

17 files changed

+2678
-25
lines changed

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,10 @@ serde_bytes = "0.11.5"
5555

5656
[dev-dependencies]
5757
assert_matches = "1.2"
58-
serde_bytes = "0.11"
58+
criterion = "0.3.0"
5959
pretty_assertions = "0.6.1"
60+
proptest = "1.0.0"
61+
serde_bytes = "0.11"
6062
chrono = { version = "0.4", features = ["serde"] }
6163

6264
[package.metadata.docs.rs]

src/bson.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -730,13 +730,10 @@ impl Bson {
730730
if let Ok(regex) = doc.get_document("$regularExpression") {
731731
if let Ok(pattern) = regex.get_str("pattern") {
732732
if let Ok(options) = regex.get_str("options") {
733-
let mut options: Vec<_> = options.chars().collect();
734-
options.sort_unstable();
735-
736-
return Bson::RegularExpression(Regex {
737-
pattern: pattern.into(),
738-
options: options.into_iter().collect(),
739-
});
733+
return Bson::RegularExpression(Regex::new(
734+
pattern.into(),
735+
options.into(),
736+
));
740737
}
741738
}
742739
}
@@ -1014,6 +1011,15 @@ pub struct Regex {
10141011
pub options: String,
10151012
}
10161013

1014+
impl Regex {
1015+
pub(crate) fn new(pattern: String, options: String) -> Self {
1016+
let mut chars: Vec<_> = options.chars().collect();
1017+
chars.sort_unstable();
1018+
let options: String = chars.into_iter().collect();
1019+
Self { pattern, options }
1020+
}
1021+
}
1022+
10171023
impl Display for Regex {
10181024
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
10191025
write!(fmt, "/{}/{}", self.pattern, self.options)

src/de/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ pub(crate) fn read_string<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) ->
108108
Ok(s)
109109
}
110110

111-
fn read_bool<R: Read>(mut reader: R) -> Result<bool> {
111+
pub(crate) fn read_bool<R: Read>(mut reader: R) -> Result<bool> {
112112
let val = read_u8(&mut reader)?;
113113
if val > 1 {
114114
return Err(Error::invalid_value(

src/de/raw.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ impl<'de> serde::de::MapAccess<'de> for Decimal128Access {
625625
where
626626
V: serde::de::DeserializeSeed<'de>,
627627
{
628-
seed.deserialize(Decimal128Deserializer(self.decimal.clone()))
628+
seed.deserialize(Decimal128Deserializer(self.decimal))
629629
}
630630
}
631631

src/decimal128.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::fmt;
66
///
77
/// Currently, this type can only be used to round-trip through BSON. See
88
/// [RUST-36](https://jira.mongodb.org/browse/RUST-36) to track the progress towards a complete implementation.
9-
#[derive(Clone, PartialEq)]
9+
#[derive(Copy, Clone, PartialEq)]
1010
pub struct Decimal128 {
1111
/// BSON bytes containing the decimal128. Stored for round tripping.
1212
pub(crate) bytes: [u8; 128 / 8],

src/extjson/models.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,7 @@ pub(crate) struct RegexBody {
116116

117117
impl Regex {
118118
pub(crate) fn parse(self) -> crate::Regex {
119-
let mut chars: Vec<_> = self.body.options.chars().collect();
120-
chars.sort_unstable();
121-
let options: String = chars.into_iter().collect();
122-
123-
crate::Regex {
124-
pattern: self.body.pattern,
125-
options,
126-
}
119+
crate::Regex::new(self.body.pattern, self.body.options)
127120
}
128121
}
129122

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ pub use self::{
280280
Deserializer,
281281
},
282282
decimal128::Decimal128,
283+
raw::{RawDocument, RawDocumentBuf, RawArray},
283284
ser::{to_bson, to_document, to_vec, Serializer},
284285
uuid::{Uuid, UuidRepresentation},
285286
};
@@ -293,6 +294,7 @@ pub mod decimal128;
293294
pub mod document;
294295
pub mod extjson;
295296
pub mod oid;
297+
pub mod raw;
296298
pub mod ser;
297299
pub mod serde_helpers;
298300
pub mod spec;

src/raw/array.rs

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
use std::convert::TryFrom;
2+
3+
use super::{
4+
error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult},
5+
Error,
6+
Iter,
7+
RawBinary,
8+
RawBson,
9+
RawDocument,
10+
RawRegex,
11+
Result,
12+
};
13+
use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp};
14+
15+
/// A slice of a BSON document containing a BSON array value (akin to [`std::str`]). This can be
16+
/// retrieved from a [`RawDocument`] via [`RawDocument::get`].
17+
///
18+
/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`.
19+
///
20+
/// Accessing elements within a [`RawArray`] is similar to element access in [`crate::Document`],
21+
/// but because the contents are parsed during iteration instead of at creation time, format errors
22+
/// can happen at any time during use.
23+
///
24+
/// Iterating over a [`RawArray`] yields either an error or a value that borrows from the
25+
/// original document without making any additional allocations.
26+
///
27+
/// ```
28+
/// use bson::{doc, raw::RawDocument};
29+
///
30+
/// let doc = doc! {
31+
/// "x": [1, true, "two", 5.5]
32+
/// };
33+
/// let bytes = bson::to_vec(&doc)?;
34+
///
35+
/// let rawdoc = RawDocument::new(bytes.as_slice())?;
36+
/// let rawarray = rawdoc.get_array("x")?;
37+
///
38+
/// for v in rawarray {
39+
/// println!("{:?}", v?);
40+
/// }
41+
/// # Ok::<(), Box<dyn std::error::Error>>(())
42+
/// ```
43+
///
44+
/// Individual elements can be accessed using [`RawArray::get`] or any of
45+
/// the type-specific getters, such as [`RawArray::get_object_id`] or
46+
/// [`RawArray::get_str`]. Note that accessing elements is an O(N) operation, as it
47+
/// requires iterating through the array from the beginning to find the requested index.
48+
///
49+
/// ```
50+
/// # use bson::raw::{ValueAccessError};
51+
/// use bson::{doc, raw::RawDocument};
52+
///
53+
/// let doc = doc! {
54+
/// "x": [1, true, "two", 5.5]
55+
/// };
56+
/// let bytes = bson::to_vec(&doc)?;
57+
///
58+
/// let rawdoc = RawDocument::new(bytes.as_slice())?;
59+
/// let rawarray = rawdoc.get_array("x")?;
60+
///
61+
/// assert_eq!(rawarray.get_bool(1)?, true);
62+
/// # Ok::<(), Box<dyn std::error::Error>>(())
63+
/// ```
64+
#[derive(PartialEq)]
65+
#[repr(transparent)]
66+
pub struct RawArray {
67+
pub(crate) doc: RawDocument,
68+
}
69+
70+
impl RawArray {
71+
pub(crate) fn from_doc(doc: &RawDocument) -> &RawArray {
72+
// SAFETY:
73+
//
74+
// Dereferencing a raw pointer requires unsafe due to the potential that the pointer is
75+
// null, dangling, or misaligned. We know the pointer is not null or dangling due to the
76+
// fact that it's created by a safe reference. Converting &RawDocument to *const
77+
// RawDocument will be properly aligned due to them being references to the same type,
78+
// and converting *const RawDocument to *const RawArray is aligned due to the fact that
79+
// the only field in a RawArray is a RawDocument, meaning the structs are represented
80+
// identically at the byte level.
81+
unsafe { &*(doc as *const RawDocument as *const RawArray) }
82+
}
83+
84+
/// Gets a reference to the value at the given index.
85+
pub fn get(&self, index: usize) -> Result<Option<RawBson<'_>>> {
86+
self.into_iter().nth(index).transpose()
87+
}
88+
89+
fn get_with<'a, T>(
90+
&'a self,
91+
index: usize,
92+
expected_type: ElementType,
93+
f: impl FnOnce(RawBson<'a>) -> Option<T>,
94+
) -> ValueAccessResult<T> {
95+
let bson = self
96+
.get(index)
97+
.map_err(|e| ValueAccessError {
98+
key: index.to_string(),
99+
kind: ValueAccessErrorKind::InvalidBson(e),
100+
})?
101+
.ok_or(ValueAccessError {
102+
key: index.to_string(),
103+
kind: ValueAccessErrorKind::NotPresent,
104+
})?;
105+
match f(bson) {
106+
Some(t) => Ok(t),
107+
None => Err(ValueAccessError {
108+
key: index.to_string(),
109+
kind: ValueAccessErrorKind::UnexpectedType {
110+
expected: expected_type,
111+
actual: bson.element_type(),
112+
},
113+
}),
114+
}
115+
}
116+
117+
/// Gets the BSON double at the given index or returns an error if the value at that index isn't
118+
/// a double.
119+
pub fn get_f64(&self, index: usize) -> ValueAccessResult<f64> {
120+
self.get_with(index, ElementType::Double, RawBson::as_f64)
121+
}
122+
123+
/// Gets a reference to the string at the given index or returns an error if the
124+
/// value at that index isn't a string.
125+
pub fn get_str(&self, index: usize) -> ValueAccessResult<&str> {
126+
self.get_with(index, ElementType::String, RawBson::as_str)
127+
}
128+
129+
/// Gets a reference to the document at the given index or returns an error if the
130+
/// value at that index isn't a document.
131+
pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDocument> {
132+
self.get_with(index, ElementType::EmbeddedDocument, RawBson::as_document)
133+
}
134+
135+
/// Gets a reference to the array at the given index or returns an error if the
136+
/// value at that index isn't a array.
137+
pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArray> {
138+
self.get_with(index, ElementType::Array, RawBson::as_array)
139+
}
140+
141+
/// Gets a reference to the BSON binary value at the given index or returns an error if the
142+
/// value at that index isn't a binary.
143+
pub fn get_binary(&self, index: usize) -> ValueAccessResult<RawBinary<'_>> {
144+
self.get_with(index, ElementType::Binary, RawBson::as_binary)
145+
}
146+
147+
/// Gets the ObjectId at the given index or returns an error if the value at that index isn't an
148+
/// ObjectId.
149+
pub fn get_object_id(&self, index: usize) -> ValueAccessResult<ObjectId> {
150+
self.get_with(index, ElementType::ObjectId, RawBson::as_object_id)
151+
}
152+
153+
/// Gets the boolean at the given index or returns an error if the value at that index isn't a
154+
/// boolean.
155+
pub fn get_bool(&self, index: usize) -> ValueAccessResult<bool> {
156+
self.get_with(index, ElementType::Boolean, RawBson::as_bool)
157+
}
158+
159+
/// Gets the DateTime at the given index or returns an error if the value at that index isn't a
160+
/// DateTime.
161+
pub fn get_datetime(&self, index: usize) -> ValueAccessResult<DateTime> {
162+
self.get_with(index, ElementType::DateTime, RawBson::as_datetime)
163+
}
164+
165+
/// Gets a reference to the BSON regex at the given index or returns an error if the
166+
/// value at that index isn't a regex.
167+
pub fn get_regex(&self, index: usize) -> ValueAccessResult<RawRegex<'_>> {
168+
self.get_with(index, ElementType::RegularExpression, RawBson::as_regex)
169+
}
170+
171+
/// Gets a reference to the BSON timestamp at the given index or returns an error if the
172+
/// value at that index isn't a timestamp.
173+
pub fn get_timestamp(&self, index: usize) -> ValueAccessResult<Timestamp> {
174+
self.get_with(index, ElementType::Timestamp, RawBson::as_timestamp)
175+
}
176+
177+
/// Gets the BSON int32 at the given index or returns an error if the value at that index isn't
178+
/// a 32-bit integer.
179+
pub fn get_i32(&self, index: usize) -> ValueAccessResult<i32> {
180+
self.get_with(index, ElementType::Int32, RawBson::as_i32)
181+
}
182+
183+
/// Gets BSON int64 at the given index or returns an error if the value at that index isn't a
184+
/// 64-bit integer.
185+
pub fn get_i64(&self, index: usize) -> ValueAccessResult<i64> {
186+
self.get_with(index, ElementType::Int64, RawBson::as_i64)
187+
}
188+
189+
/// Gets a reference to the raw bytes of the [`RawArray`].
190+
pub fn as_bytes(&self) -> &[u8] {
191+
self.doc.as_bytes()
192+
}
193+
}
194+
195+
impl std::fmt::Debug for RawArray {
196+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
197+
f.debug_struct("RawArray")
198+
.field("data", &hex::encode(self.doc.as_bytes()))
199+
.finish()
200+
}
201+
}
202+
203+
impl TryFrom<&RawArray> for Vec<Bson> {
204+
type Error = Error;
205+
206+
fn try_from(arr: &RawArray) -> Result<Vec<Bson>> {
207+
arr.into_iter()
208+
.map(|result| {
209+
let rawbson = result?;
210+
Bson::try_from(rawbson)
211+
})
212+
.collect()
213+
}
214+
}
215+
216+
impl<'a> IntoIterator for &'a RawArray {
217+
type IntoIter = RawArrayIter<'a>;
218+
type Item = Result<RawBson<'a>>;
219+
220+
fn into_iter(self) -> RawArrayIter<'a> {
221+
RawArrayIter {
222+
inner: self.doc.into_iter(),
223+
}
224+
}
225+
}
226+
227+
/// An iterator over borrowed raw BSON array values.
228+
pub struct RawArrayIter<'a> {
229+
inner: Iter<'a>,
230+
}
231+
232+
impl<'a> Iterator for RawArrayIter<'a> {
233+
type Item = Result<RawBson<'a>>;
234+
235+
fn next(&mut self) -> Option<Result<RawBson<'a>>> {
236+
match self.inner.next() {
237+
Some(Ok((_, v))) => Some(Ok(v)),
238+
Some(Err(e)) => Some(Err(e)),
239+
None => None,
240+
}
241+
}
242+
}

0 commit comments

Comments
 (0)