Skip to content

Commit 0a50512

Browse files
authored
RUST-886 Use lossy UTF-8 decoding for responses to insert and update commands (#601)
1 parent ec66dcf commit 0a50512

File tree

4 files changed

+115
-2
lines changed

4 files changed

+115
-2
lines changed

src/cmap/conn/command.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,16 @@ impl RawCommandResponse {
217217
})
218218
}
219219

220+
/// Used to handle decoding responses where the server may return invalid UTF-8 in error
221+
/// messages.
222+
pub(crate) fn body_utf8_lossy<'a, T: Deserialize<'a>>(&'a self) -> Result<T> {
223+
bson::from_slice_utf8_lossy(self.raw.as_bytes()).map_err(|e| {
224+
Error::from(ErrorKind::InvalidResponse {
225+
message: format!("{}", e),
226+
})
227+
})
228+
}
229+
220230
pub(crate) fn raw_body(&self) -> &RawDocument {
221231
&self.raw
222232
}

src/operation/insert/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ impl<'a, T: Serialize> Operation for Insert<'a, T> {
135135
raw_response: RawCommandResponse,
136136
_description: &StreamDescription,
137137
) -> Result<Self::O> {
138-
let response: WriteResponseBody = raw_response.body()?;
138+
let response: WriteResponseBody = raw_response.body_utf8_lossy()?;
139139

140140
let mut map = HashMap::new();
141141
if self.is_ordered() {

src/operation/update/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ impl Operation for Update {
118118
raw_response: RawCommandResponse,
119119
_description: &StreamDescription,
120120
) -> Result<Self::O> {
121-
let response: WriteResponseBody<UpdateBody> = raw_response.body()?;
121+
let response: WriteResponseBody<UpdateBody> = raw_response.body_utf8_lossy()?;
122122
response.validate().map_err(convert_bulk_errors)?;
123123

124124
let modified_count = response.n_modified;

src/test/coll.rs

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::{
1919
FindOneOptions,
2020
FindOptions,
2121
Hint,
22+
IndexOptions,
2223
InsertManyOptions,
2324
ReadConcern,
2425
ReadPreference,
@@ -35,6 +36,7 @@ use crate::{
3536
LOCK,
3637
},
3738
Collection,
39+
IndexModel,
3840
};
3941

4042
#[cfg_attr(feature = "tokio-runtime", tokio::test)]
@@ -1098,3 +1100,104 @@ async fn cursor_batch_size() {
10981100
let docs: Vec<_> = cursor.stream(&mut session).try_collect().await.unwrap();
10991101
assert_eq!(docs.len(), 10);
11001102
}
1103+
1104+
/// Test that the driver gracefully handles cases where the server returns invalid UTF-8 in error
1105+
/// messages. See SERVER-24007 and related tickets for details.
1106+
#[cfg_attr(feature = "tokio-runtime", tokio::test)]
1107+
#[cfg_attr(feature = "async-std-runtime", async_std::test)]
1108+
async fn invalid_utf8_response() {
1109+
let _guard: RwLockReadGuard<()> = LOCK.run_concurrently().await;
1110+
1111+
let client = TestClient::new().await;
1112+
let coll = client
1113+
.init_db_and_coll("invalid_uft8_handling", "invalid_uft8_handling")
1114+
.await;
1115+
1116+
let index_model = IndexModel::builder()
1117+
.keys(doc! {"name": 1})
1118+
.options(IndexOptions::builder().unique(true).build())
1119+
.build();
1120+
coll.create_index(index_model, None)
1121+
.await
1122+
.expect("creating an index should succeed");
1123+
1124+
// a document containing a long string with multi-byte unicode characters. taken from a user
1125+
// repro in RUBY-2560.
1126+
let long_unicode_str_doc = doc! {"name": "(╯°□°)╯︵ ┻━┻(╯°□°)╯︵ ┻━┻(╯°□°)╯︵ ┻━┻(╯°□°)╯︵ ┻━┻(╯°□°)╯︵ ┻━┻(╯°□°)╯︵ ┻━┻"};
1127+
coll.insert_one(&long_unicode_str_doc, None)
1128+
.await
1129+
.expect("first insert of document should succeed");
1130+
1131+
// test triggering an invalid error message via an insert_one.
1132+
let insert_err = coll
1133+
.insert_one(&long_unicode_str_doc, None)
1134+
.await
1135+
.expect_err("second insert of document should fail")
1136+
.kind;
1137+
assert_duplicate_key_error_with_utf8_replacement(&insert_err);
1138+
1139+
// test triggering an invalid error message via an insert_many.
1140+
let insert_err = coll
1141+
.insert_many([&long_unicode_str_doc], None)
1142+
.await
1143+
.expect_err("second insert of document should fail")
1144+
.kind;
1145+
assert_duplicate_key_error_with_utf8_replacement(&insert_err);
1146+
1147+
// test triggering an invalid error message via an update_one.
1148+
coll.insert_one(doc! {"x": 1}, None)
1149+
.await
1150+
.expect("inserting new document should succeed");
1151+
1152+
let update_err = coll
1153+
.update_one(doc! {"x": 1}, doc! {"$set": &long_unicode_str_doc}, None)
1154+
.await
1155+
.expect_err("update setting duplicate key should fail")
1156+
.kind;
1157+
assert_duplicate_key_error_with_utf8_replacement(&update_err);
1158+
1159+
// test triggering an invalid error message via an update_many.
1160+
let update_err = coll
1161+
.update_many(doc! {"x": 1}, doc! {"$set": &long_unicode_str_doc}, None)
1162+
.await
1163+
.expect_err("update setting duplicate key should fail")
1164+
.kind;
1165+
assert_duplicate_key_error_with_utf8_replacement(&update_err);
1166+
1167+
// test triggering an invalid error message via a replace_one.
1168+
let replace_err = coll
1169+
.replace_one(doc! {"x": 1}, &long_unicode_str_doc, None)
1170+
.await
1171+
.expect_err("replacement with duplicate key should fail")
1172+
.kind;
1173+
assert_duplicate_key_error_with_utf8_replacement(&replace_err);
1174+
}
1175+
1176+
/// Check that we successfully decoded a duplicate key error and that the error message contains the
1177+
/// unicode replacement character, meaning we gracefully handled the invalid UTF-8.
1178+
fn assert_duplicate_key_error_with_utf8_replacement(error: &ErrorKind) {
1179+
match error {
1180+
ErrorKind::Write(ref failure) => match failure {
1181+
WriteFailure::WriteError(err) => {
1182+
assert_eq!(err.code, 11000);
1183+
assert!(err.message.contains('�'));
1184+
}
1185+
e => panic!("expected WriteFailure::WriteError, got {:?} instead", e),
1186+
},
1187+
ErrorKind::BulkWrite(ref failure) => match &failure.write_errors {
1188+
Some(write_errors) => {
1189+
assert_eq!(write_errors.len(), 1);
1190+
assert_eq!(write_errors[0].code, 11000);
1191+
assert!(write_errors[0].message.contains('�'));
1192+
}
1193+
None => panic!(
1194+
"expected BulkWriteFailure containing write errors, got {:?} instead",
1195+
failure
1196+
),
1197+
},
1198+
e => panic!(
1199+
"expected ErrorKind::Write or ErrorKind::BulkWrite, got {:?} instead",
1200+
e
1201+
),
1202+
}
1203+
}

0 commit comments

Comments
 (0)