Skip to content

Commit d6e269e

Browse files
authored
RUST-648 Reintroduce Document decoding w/ lossy UTF-8 conversion (#232)
1 parent d8d8ebe commit d6e269e

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

src/document.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,7 @@ impl Document {
524524
Ok(())
525525
}
526526

527-
/// Attempts to deserialize a `Document` from a byte stream.
528-
pub fn from_reader<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Document> {
527+
fn decode<R: Read + ?Sized>(reader: &mut R, utf_lossy: bool) -> crate::de::Result<Document> {
529528
let mut doc = Document::new();
530529

531530
let length = read_i32(reader)?;
@@ -550,7 +549,7 @@ impl Document {
550549
break;
551550
}
552551

553-
let (key, val) = deserialize_bson_kvp(cursor, tag, false)?;
552+
let (key, val) = deserialize_bson_kvp(cursor, tag, utf_lossy)?;
554553
doc.insert(key, val);
555554
}
556555
Ok(())
@@ -559,6 +558,21 @@ impl Document {
559558

560559
Ok(doc)
561560
}
561+
562+
/// Attempts to deserialize a `Document` from a byte stream.
563+
pub fn from_reader<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Document> {
564+
Self::decode(reader, false)
565+
}
566+
567+
/// Attempt to deserialize a `Document` that may contain invalid UTF-8 strings from a byte
568+
/// stream.
569+
///
570+
/// This is mainly useful when reading raw BSON returned from a MongoDB server, which
571+
/// in rare cases can contain invalidly truncated strings (https://jira.mongodb.org/browse/SERVER-24007).
572+
/// For most use cases, `Document::from_reader` can be used instead.
573+
pub fn from_reader_utf8_lossy<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Document> {
574+
Self::decode(reader, true)
575+
}
562576
}
563577

564578
pub struct Entry<'a> {

src/tests/modules/serializer_deserializer.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,21 @@ fn test_serialize_deserialize_utf8_string() {
6464
assert_eq!(deserialized, doc);
6565
}
6666

67+
#[test]
68+
fn test_encode_decode_utf8_string_invalid() {
69+
let bytes = b"\x80\xae".to_vec();
70+
let src = unsafe { String::from_utf8_unchecked(bytes) };
71+
72+
let doc = doc! { "key": src };
73+
74+
let mut buf = Vec::new();
75+
doc.to_writer(&mut buf).unwrap();
76+
77+
let expected = doc! { "key": "��" };
78+
let decoded = Document::from_reader_utf8_lossy(&mut Cursor::new(buf)).unwrap();
79+
assert_eq!(decoded, expected);
80+
}
81+
6782
#[test]
6883
fn test_serialize_deserialize_array() {
6984
let _guard = LOCK.run_concurrently();

0 commit comments

Comments
 (0)