Skip to content

Commit 2e8fb00

Browse files
authored
RUST-1992 Factor raw bson encoding out of RawDocumentBuf (#486)
1 parent 1c6e65a commit 2e8fb00

File tree

6 files changed

+118
-127
lines changed

6 files changed

+118
-127
lines changed

src/raw/document_buf.rs

Lines changed: 6 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::{
77

88
use serde::{Deserialize, Serialize};
99

10-
use crate::{de::MIN_BSON_DOCUMENT_SIZE, spec::BinarySubtype, Document};
10+
use crate::{de::MIN_BSON_DOCUMENT_SIZE, Document};
1111

1212
use super::{
1313
bson::RawBson,
@@ -21,6 +21,8 @@ use super::{
2121
Result,
2222
};
2323

24+
mod raw_writer;
25+
2426
/// An owned BSON document (akin to [`std::path::PathBuf`]), backed by a buffer of raw BSON bytes.
2527
/// This can be created from a `Vec<u8>` or a [`crate::Document`].
2628
///
@@ -221,103 +223,9 @@ impl RawDocumentBuf {
221223
///
222224
/// If the provided key contains an interior null byte, this method will panic.
223225
pub fn append_ref<'a>(&mut self, key: impl AsRef<str>, value: impl Into<RawBsonRef<'a>>) {
224-
fn append_string(doc: &mut RawDocumentBuf, value: &str) {
225-
doc.data
226-
.extend(((value.as_bytes().len() + 1) as i32).to_le_bytes());
227-
doc.data.extend(value.as_bytes());
228-
doc.data.push(0);
229-
}
230-
231-
fn append_cstring(doc: &mut RawDocumentBuf, value: &str) {
232-
if value.contains('\0') {
233-
panic!("cstr includes interior null byte: {}", value)
234-
}
235-
doc.data.extend(value.as_bytes());
236-
doc.data.push(0);
237-
}
238-
239-
let original_len = self.data.len();
240-
241-
// write the key for the next value to the end
242-
// the element type will replace the previous null byte terminator of the document
243-
append_cstring(self, key.as_ref());
244-
245-
let value = value.into();
246-
let element_type = value.element_type();
247-
248-
match value {
249-
RawBsonRef::Int32(i) => {
250-
self.data.extend(i.to_le_bytes());
251-
}
252-
RawBsonRef::String(s) => {
253-
append_string(self, s);
254-
}
255-
RawBsonRef::Document(d) => {
256-
self.data.extend(d.as_bytes());
257-
}
258-
RawBsonRef::Array(a) => {
259-
self.data.extend(a.as_bytes());
260-
}
261-
RawBsonRef::Binary(b) => {
262-
let len = b.len();
263-
self.data.extend(len.to_le_bytes());
264-
self.data.push(b.subtype.into());
265-
if let BinarySubtype::BinaryOld = b.subtype {
266-
self.data.extend((len - 4).to_le_bytes())
267-
}
268-
self.data.extend(b.bytes);
269-
}
270-
RawBsonRef::Boolean(b) => {
271-
self.data.push(b as u8);
272-
}
273-
RawBsonRef::DateTime(dt) => {
274-
self.data.extend(dt.timestamp_millis().to_le_bytes());
275-
}
276-
RawBsonRef::DbPointer(dbp) => {
277-
append_string(self, dbp.namespace);
278-
self.data.extend(dbp.id.bytes());
279-
}
280-
RawBsonRef::Decimal128(d) => {
281-
self.data.extend(d.bytes());
282-
}
283-
RawBsonRef::Double(d) => {
284-
self.data.extend(d.to_le_bytes());
285-
}
286-
RawBsonRef::Int64(i) => {
287-
self.data.extend(i.to_le_bytes());
288-
}
289-
RawBsonRef::RegularExpression(re) => {
290-
append_cstring(self, re.pattern);
291-
append_cstring(self, re.options);
292-
}
293-
RawBsonRef::JavaScriptCode(js) => {
294-
append_string(self, js);
295-
}
296-
RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => {
297-
let len = code_w_scope.len();
298-
self.data.extend(len.to_le_bytes());
299-
append_string(self, code_w_scope.code);
300-
self.data.extend(code_w_scope.scope.as_bytes());
301-
}
302-
RawBsonRef::Timestamp(ts) => {
303-
self.data.extend(ts.to_le_bytes());
304-
}
305-
RawBsonRef::ObjectId(oid) => {
306-
self.data.extend(oid.bytes());
307-
}
308-
RawBsonRef::Symbol(s) => {
309-
append_string(self, s);
310-
}
311-
RawBsonRef::Null | RawBsonRef::Undefined | RawBsonRef::MinKey | RawBsonRef::MaxKey => {}
312-
}
313-
314-
// update element type
315-
self.data[original_len - 1] = element_type as u8;
316-
// append trailing null byte
317-
self.data.push(0);
318-
// update length
319-
let new_len = (self.data.len() as i32).to_le_bytes();
320-
self.data[0..4].copy_from_slice(&new_len);
226+
raw_writer::RawWriter::new(&mut self.data)
227+
.append(key.as_ref(), value.into())
228+
.expect("key should not contain interior null byte")
321229
}
322230

323231
/// Convert this [`RawDocumentBuf`] to a [`Document`], returning an error

src/raw/document_buf/raw_writer.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
use crate::{
2+
ser::{write_cstring, write_string},
3+
spec::BinarySubtype,
4+
RawBsonRef,
5+
};
6+
7+
pub(super) struct RawWriter<'a> {
8+
data: &'a mut Vec<u8>,
9+
}
10+
11+
impl<'a> RawWriter<'a> {
12+
pub(super) fn new(data: &'a mut Vec<u8>) -> Self {
13+
Self { data }
14+
}
15+
16+
pub(super) fn append(&mut self, key: &str, value: RawBsonRef) -> crate::ser::Result<()> {
17+
let original_len = self.data.len();
18+
self.data[original_len - 1] = value.element_type() as u8;
19+
20+
write_cstring(self.data, key)?;
21+
22+
match value {
23+
RawBsonRef::Int32(i) => {
24+
self.data.extend(i.to_le_bytes());
25+
}
26+
RawBsonRef::String(s) => {
27+
write_string(self.data, s);
28+
}
29+
RawBsonRef::Document(d) => {
30+
self.data.extend(d.as_bytes());
31+
}
32+
RawBsonRef::Array(a) => {
33+
self.data.extend(a.as_bytes());
34+
}
35+
RawBsonRef::Binary(b) => {
36+
let len = b.len();
37+
self.data.extend(len.to_le_bytes());
38+
self.data.push(b.subtype.into());
39+
if let BinarySubtype::BinaryOld = b.subtype {
40+
self.data.extend((len - 4).to_le_bytes())
41+
}
42+
self.data.extend(b.bytes);
43+
}
44+
RawBsonRef::Boolean(b) => {
45+
self.data.push(b as u8);
46+
}
47+
RawBsonRef::DateTime(dt) => {
48+
self.data.extend(dt.timestamp_millis().to_le_bytes());
49+
}
50+
RawBsonRef::DbPointer(dbp) => {
51+
write_string(self.data, dbp.namespace);
52+
self.data.extend(dbp.id.bytes());
53+
}
54+
RawBsonRef::Decimal128(d) => {
55+
self.data.extend(d.bytes());
56+
}
57+
RawBsonRef::Double(d) => {
58+
self.data.extend(d.to_le_bytes());
59+
}
60+
RawBsonRef::Int64(i) => {
61+
self.data.extend(i.to_le_bytes());
62+
}
63+
RawBsonRef::RegularExpression(re) => {
64+
write_cstring(self.data, re.pattern)?;
65+
write_cstring(self.data, re.options)?;
66+
}
67+
RawBsonRef::JavaScriptCode(js) => {
68+
write_string(self.data, js);
69+
}
70+
RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => {
71+
let len = code_w_scope.len();
72+
self.data.extend(len.to_le_bytes());
73+
write_string(self.data, code_w_scope.code);
74+
self.data.extend(code_w_scope.scope.as_bytes());
75+
}
76+
RawBsonRef::Timestamp(ts) => {
77+
self.data.extend(ts.to_le_bytes());
78+
}
79+
RawBsonRef::ObjectId(oid) => {
80+
self.data.extend(oid.bytes());
81+
}
82+
RawBsonRef::Symbol(s) => {
83+
write_string(self.data, s);
84+
}
85+
RawBsonRef::Null | RawBsonRef::Undefined | RawBsonRef::MinKey | RawBsonRef::MaxKey => {}
86+
}
87+
88+
// append trailing null byte
89+
self.data.push(0);
90+
// update length
91+
let new_len = (self.data.len() as i32).to_le_bytes();
92+
self.data[0..4].copy_from_slice(&new_len);
93+
94+
Ok(())
95+
}
96+
}

src/raw/serde/seeded_visitor.rs

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use serde::{
77

88
use crate::{
99
raw::RAW_BSON_NEWTYPE,
10+
ser::{write_cstring, write_string},
1011
spec::{BinarySubtype, ElementType},
1112
RawBson,
1213
RawBsonRef,
@@ -119,26 +120,12 @@ impl<'a, 'de> SeededVisitor<'a, 'de> {
119120

120121
/// Appends a cstring to the buffer. Returns an error if the given string contains a null byte.
121122
fn append_cstring(&mut self, key: &str) -> Result<(), String> {
122-
let key_bytes = key.as_bytes();
123-
if key_bytes.contains(&0) {
124-
return Err(format!("key contains interior null byte: {}", key));
125-
}
126-
127-
self.buffer.append_bytes(key_bytes);
128-
self.buffer.push_byte(0);
129-
130-
Ok(())
123+
write_cstring(self.buffer.get_owned_buffer(), key).map_err(|e| e.to_string())
131124
}
132125

133126
/// Appends a string and its length to the buffer.
134127
fn append_string(&mut self, s: &str) {
135-
let bytes = s.as_bytes();
136-
137-
// Add 1 to account for null byte.
138-
self.append_length_bytes((bytes.len() + 1) as i32);
139-
140-
self.buffer.append_bytes(bytes);
141-
self.buffer.push_byte(0);
128+
write_string(self.buffer.get_owned_buffer(), s)
142129
}
143130

144131
/// Converts the given length into little-endian bytes and appends the bytes to the buffer.

src/ser/mod.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,18 @@ use crate::{
4040
};
4141
use ::serde::{ser::Error as SerdeError, Serialize};
4242

43-
fn write_string<W: Write + ?Sized>(writer: &mut W, s: &str) -> Result<()> {
44-
writer.write_all(&(s.len() as i32 + 1).to_le_bytes())?;
45-
writer.write_all(s.as_bytes())?;
46-
writer.write_all(b"\0")?;
47-
Ok(())
43+
pub(crate) fn write_string(buf: &mut Vec<u8>, s: &str) {
44+
buf.extend(&(s.len() as i32 + 1).to_le_bytes());
45+
buf.extend(s.as_bytes());
46+
buf.push(0);
4847
}
4948

50-
fn write_cstring<W: Write + ?Sized>(writer: &mut W, s: &str) -> Result<()> {
49+
pub(crate) fn write_cstring(buf: &mut Vec<u8>, s: &str) -> Result<()> {
5150
if s.contains('\0') {
5251
return Err(Error::InvalidCString(s.into()));
5352
}
54-
writer.write_all(s.as_bytes())?;
55-
writer.write_all(b"\0")?;
53+
buf.extend(s.as_bytes());
54+
buf.push(0);
5655
Ok(())
5756
}
5857

src/ser/raw/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,8 @@ impl<'a> serde::Serializer for &'a mut Serializer {
199199
#[inline]
200200
fn serialize_str(self, v: &str) -> Result<Self::Ok> {
201201
self.update_element_type(ElementType::String)?;
202-
write_string(&mut self.bytes, v)
202+
write_string(&mut self.bytes, v);
203+
Ok(())
203204
}
204205

205206
#[inline]

src/ser/raw/value_serializer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> {
265265
write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), subtype)?;
266266
}
267267
SerializationStep::Symbol | SerializationStep::DbPointerRef => {
268-
write_string(&mut self.root_serializer.bytes, v)?;
268+
write_string(&mut self.root_serializer.bytes, v);
269269
}
270270
SerializationStep::RegExPattern => {
271271
write_cstring(&mut self.root_serializer.bytes, v)?;
@@ -278,7 +278,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> {
278278
write_cstring(&mut self.root_serializer.bytes, sorted.as_str())?;
279279
}
280280
SerializationStep::Code => {
281-
write_string(&mut self.root_serializer.bytes, v)?;
281+
write_string(&mut self.root_serializer.bytes, v);
282282
}
283283
SerializationStep::CodeWithScopeCode => {
284284
self.state = SerializationStep::CodeWithScopeScope {
@@ -313,7 +313,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> {
313313
scope: RawDocument::from_bytes(v).map_err(Error::custom)?,
314314
};
315315
write_i32(&mut self.root_serializer.bytes, raw.len())?;
316-
write_string(&mut self.root_serializer.bytes, code)?;
316+
write_string(&mut self.root_serializer.bytes, code);
317317
self.root_serializer.bytes.write_all(v)?;
318318
self.state = SerializationStep::Done;
319319
Ok(())
@@ -590,7 +590,7 @@ impl<'a> CodeWithScopeSerializer<'a> {
590590
fn start(code: &str, rs: &'a mut Serializer) -> Result<Self> {
591591
let start = rs.bytes.len();
592592
write_i32(&mut rs.bytes, 0)?; // placeholder length
593-
write_string(&mut rs.bytes, code)?;
593+
write_string(&mut rs.bytes, code);
594594

595595
let doc = DocumentSerializer::start(rs)?;
596596
Ok(Self { start, doc })

0 commit comments

Comments
 (0)