From 8906e84b1e924884a2de58d154d29d30a42a95e5 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Mon, 28 Oct 2024 14:38:50 -0400 Subject: [PATCH 1/9] PYTHON-4915 - Add guidance on adding _id fields to documents to CRUD spec --- test/mockupdb/test_id_ordering.py | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 test/mockupdb/test_id_ordering.py diff --git a/test/mockupdb/test_id_ordering.py b/test/mockupdb/test_id_ordering.py new file mode 100644 index 0000000000..c06833fea0 --- /dev/null +++ b/test/mockupdb/test_id_ordering.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from test import PyMongoTestCase + +import pytest + +try: + from mockupdb import MockupDB, OpMsg, going + + _HAVE_MOCKUPDB = True +except ImportError: + _HAVE_MOCKUPDB = False + + +from bson.objectid import ObjectId + +pytestmark = pytest.mark.mockupdb + + +class TestIdOrdering(PyMongoTestCase): + def test_id_ordering(self): + server = MockupDB() + server.autoresponds( + "hello", + isWritablePrimary=True, + msg="isdbgrid", + minWireVersion=0, + maxWireVersion=20, + helloOk=True, + serviceId=ObjectId(), + ) + server.run() + self.addCleanup(server.stop) + + client = self.simple_client(server.uri, loadBalanced=True) + collection = client.db.coll + with going(collection.insert_one, {"x": 1}): + request = server.receives(OpMsg({"insert": "coll"})) + self.assertEqual("_id", next(iter(request["documents"][0]))) + request.reply({"ok": 1}) From 425cd1b30377d3a9dc9cf245f86087c12cedf62d Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 29 Oct 2024 10:21:28 -0400 Subject: [PATCH 2/9] Reorder client.bulkWrite _id --- pymongo/asynchronous/client_bulk.py | 5 ++++- pymongo/synchronous/client_bulk.py | 5 ++++- test/mockupdb/test_id_ordering.py | 18 +++++++++++++++--- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/pymongo/asynchronous/client_bulk.py b/pymongo/asynchronous/client_bulk.py index 96571c21eb..055f698ef6 100644 --- a/pymongo/asynchronous/client_bulk.py +++ b/pymongo/asynchronous/client_bulk.py @@ -133,7 +133,10 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: validate_is_document_type("document", document) # Generate ObjectId client side. if not (isinstance(document, RawBSONDocument) or "_id" in document): - document["_id"] = ObjectId() + new_document = {"_id": ObjectId()} + new_document.update(document) + document.clear() + document.update(new_document) cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) diff --git a/pymongo/synchronous/client_bulk.py b/pymongo/synchronous/client_bulk.py index 2c38b1d76c..88bb1f4d31 100644 --- a/pymongo/synchronous/client_bulk.py +++ b/pymongo/synchronous/client_bulk.py @@ -133,7 +133,10 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: validate_is_document_type("document", document) # Generate ObjectId client side. if not (isinstance(document, RawBSONDocument) or "_id" in document): - document["_id"] = ObjectId() + new_document = {"_id": ObjectId()} + new_document.update(document) + document.clear() + document.update(new_document) cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) diff --git a/test/mockupdb/test_id_ordering.py b/test/mockupdb/test_id_ordering.py index c06833fea0..66da3e2c1a 100644 --- a/test/mockupdb/test_id_ordering.py +++ b/test/mockupdb/test_id_ordering.py @@ -4,8 +4,10 @@ import pytest +from pymongo import InsertOne + try: - from mockupdb import MockupDB, OpMsg, going + from mockupdb import MockupDB, OpMsg, go, going _HAVE_MOCKUPDB = True except ImportError: @@ -25,7 +27,7 @@ def test_id_ordering(self): isWritablePrimary=True, msg="isdbgrid", minWireVersion=0, - maxWireVersion=20, + maxWireVersion=25, helloOk=True, serviceId=ObjectId(), ) @@ -35,6 +37,16 @@ def test_id_ordering(self): client = self.simple_client(server.uri, loadBalanced=True) collection = client.db.coll with going(collection.insert_one, {"x": 1}): - request = server.receives(OpMsg({"insert": "coll"})) + request = server.receives() + self.assertEqual("_id", next(iter(request["documents"][0]))) + request.reply({"ok": 1}) + + with going(collection.bulk_write, [InsertOne({"x1": 1})]): + request = server.receives() self.assertEqual("_id", next(iter(request["documents"][0]))) request.reply({"ok": 1}) + + with going(client.bulk_write, [InsertOne(namespace="db.coll", document={"x2": 1})]): + request = server.receives() + self.assertEqual("_id", next(iter(request["ops"][0]["document"]))) + request.reply({"ok": 1}) From 1b3df52401b0c757c26993a9b3befd61bc522c97 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 30 Oct 2024 09:52:49 -0400 Subject: [PATCH 3/9] Use ChainMap --- pymongo/asynchronous/client_bulk.py | 8 ++++---- pymongo/synchronous/client_bulk.py | 8 ++++---- test/mockupdb/test_id_ordering.py | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/pymongo/asynchronous/client_bulk.py b/pymongo/asynchronous/client_bulk.py index 055f698ef6..0cfe510d4e 100644 --- a/pymongo/asynchronous/client_bulk.py +++ b/pymongo/asynchronous/client_bulk.py @@ -21,6 +21,7 @@ import copy import datetime import logging +from collections import ChainMap from collections.abc import MutableMapping from itertools import islice from typing import ( @@ -133,10 +134,9 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: validate_is_document_type("document", document) # Generate ObjectId client side. if not (isinstance(document, RawBSONDocument) or "_id" in document): - new_document = {"_id": ObjectId()} - new_document.update(document) - document.clear() - document.update(new_document) + document = ChainMap(document, {"_id": ObjectId()}) + elif not isinstance(document, RawBSONDocument) and "_id" in document: + document = ChainMap(document, {"_id": document["_id"]}) cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) diff --git a/pymongo/synchronous/client_bulk.py b/pymongo/synchronous/client_bulk.py index 88bb1f4d31..282f61dc15 100644 --- a/pymongo/synchronous/client_bulk.py +++ b/pymongo/synchronous/client_bulk.py @@ -21,6 +21,7 @@ import copy import datetime import logging +from collections import ChainMap from collections.abc import MutableMapping from itertools import islice from typing import ( @@ -133,10 +134,9 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: validate_is_document_type("document", document) # Generate ObjectId client side. if not (isinstance(document, RawBSONDocument) or "_id" in document): - new_document = {"_id": ObjectId()} - new_document.update(document) - document.clear() - document.update(new_document) + document = ChainMap(document, {"_id": ObjectId()}) + elif not isinstance(document, RawBSONDocument) and "_id" in document: + document = ChainMap(document, {"_id": document["_id"]}) cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) diff --git a/test/mockupdb/test_id_ordering.py b/test/mockupdb/test_id_ordering.py index 66da3e2c1a..50349ab3aa 100644 --- a/test/mockupdb/test_id_ordering.py +++ b/test/mockupdb/test_id_ordering.py @@ -50,3 +50,21 @@ def test_id_ordering(self): request = server.receives() self.assertEqual("_id", next(iter(request["ops"][0]["document"]))) request.reply({"ok": 1}) + + # Re-ordering user-supplied _id fields is not required by the spec, but PyMongo does it for performance reasons + with going(collection.insert_one, {"x": 1, "_id": 111}): + request = server.receives() + self.assertEqual("_id", next(iter(request["documents"][0]))) + request.reply({"ok": 1}) + + with going(collection.bulk_write, [InsertOne({"x1": 1, "_id": 1111})]): + request = server.receives() + self.assertEqual("_id", next(iter(request["documents"][0]))) + request.reply({"ok": 1}) + + with going( + client.bulk_write, [InsertOne(namespace="db.coll", document={"x2": 1, "_id": 11111})] + ): + request = server.receives() + self.assertEqual("_id", next(iter(request["ops"][0]["document"]))) + request.reply({"ok": 1}) From 36187bb8ae4f32537797d1e5d6c80ad2d21fcb83 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 30 Oct 2024 09:53:41 -0400 Subject: [PATCH 4/9] Add license --- test/mockupdb/test_id_ordering.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/mockupdb/test_id_ordering.py b/test/mockupdb/test_id_ordering.py index 50349ab3aa..8dfc7d9746 100644 --- a/test/mockupdb/test_id_ordering.py +++ b/test/mockupdb/test_id_ordering.py @@ -1,3 +1,17 @@ +# Copyright 2024-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import annotations from test import PyMongoTestCase From 0e07e184be83916fd4dfad17867d86e3610cc216 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 30 Oct 2024 17:09:01 -0400 Subject: [PATCH 5/9] Unwrap ChainMaps during logging --- pymongo/_client_bulk_shared.py | 6 +++++- pymongo/asynchronous/client_bulk.py | 14 ++++++++++---- pymongo/monitoring.py | 7 ++++++- pymongo/synchronous/client_bulk.py | 14 ++++++++++---- test/mockupdb/test_id_ordering.py | 15 ++++++++++++--- 5 files changed, 43 insertions(+), 13 deletions(-) diff --git a/pymongo/_client_bulk_shared.py b/pymongo/_client_bulk_shared.py index 649f1c6aa0..5722bdbe03 100644 --- a/pymongo/_client_bulk_shared.py +++ b/pymongo/_client_bulk_shared.py @@ -16,7 +16,7 @@ """Constants, types, and classes shared across Client Bulk Write API implementations.""" from __future__ import annotations -from typing import TYPE_CHECKING, Any, Mapping, MutableMapping, NoReturn +from typing import TYPE_CHECKING, Any, ChainMap, Mapping, MutableMapping, NoReturn from pymongo.errors import ClientBulkWriteException, OperationFailure from pymongo.helpers_shared import _get_wce_doc @@ -63,6 +63,10 @@ def _throw_client_bulk_write_exception( """Raise a ClientBulkWriteException from the full result.""" # retryWrites on MMAPv1 should raise an actionable error. if full_result["writeErrors"]: + # Unpack ChainMaps into the original document only + for doc in full_result["writeErrors"]: + if "document" in doc["op"] and isinstance(doc["op"]["document"], ChainMap): + doc["op"]["document"] = doc["op"]["document"].maps[0] full_result["writeErrors"].sort(key=lambda error: error["idx"]) err = full_result["writeErrors"][0] code = err["code"] diff --git a/pymongo/asynchronous/client_bulk.py b/pymongo/asynchronous/client_bulk.py index 0cfe510d4e..63d4936589 100644 --- a/pymongo/asynchronous/client_bulk.py +++ b/pymongo/asynchronous/client_bulk.py @@ -133,10 +133,16 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: """Add an insert document to the list of ops.""" validate_is_document_type("document", document) # Generate ObjectId client side. - if not (isinstance(document, RawBSONDocument) or "_id" in document): - document = ChainMap(document, {"_id": ObjectId()}) - elif not isinstance(document, RawBSONDocument) and "_id" in document: - document = ChainMap(document, {"_id": document["_id"]}) + if not isinstance(document, RawBSONDocument): + # Since the data document itself is nested within the insert document + # it won't be automatically re-ordered by the BSON conversion. + # We use ChainMap here to make the _id field the first field instead. + if "_id" in document: + document = ChainMap(document, {"_id": document["_id"]}) + else: + id = ObjectId() + document["_id"] = id + document = ChainMap(document, {"_id": id}) cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) diff --git a/pymongo/monitoring.py b/pymongo/monitoring.py index 96f88597d2..f7541218dd 100644 --- a/pymongo/monitoring.py +++ b/pymongo/monitoring.py @@ -190,7 +190,7 @@ def connection_checked_in(self, event): import datetime from collections import abc, namedtuple -from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence +from typing import TYPE_CHECKING, Any, ChainMap, Mapping, Optional, Sequence from bson.objectid import ObjectId from pymongo.hello import Hello, HelloCompat @@ -625,6 +625,11 @@ def __init__( raise ValueError(f"{command!r} is not a valid command") # Command name must be first key. command_name = next(iter(command)) + # Unpack ChainMaps into the original document only + if command_name == "bulkWrite" and "ops" in command: + for doc in command["ops"]: + if "document" in doc and isinstance(doc["document"], ChainMap): + doc["document"] = doc["document"].maps[0] super().__init__( command_name, request_id, diff --git a/pymongo/synchronous/client_bulk.py b/pymongo/synchronous/client_bulk.py index 282f61dc15..0e2f54e482 100644 --- a/pymongo/synchronous/client_bulk.py +++ b/pymongo/synchronous/client_bulk.py @@ -133,10 +133,16 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: """Add an insert document to the list of ops.""" validate_is_document_type("document", document) # Generate ObjectId client side. - if not (isinstance(document, RawBSONDocument) or "_id" in document): - document = ChainMap(document, {"_id": ObjectId()}) - elif not isinstance(document, RawBSONDocument) and "_id" in document: - document = ChainMap(document, {"_id": document["_id"]}) + if not isinstance(document, RawBSONDocument): + # Since the data document itself is nested within the insert document + # it won't be automatically re-ordered by the BSON conversion. + # We use ChainMap here to make the _id field the first field instead. + if "_id" in document: + document = ChainMap(document, {"_id": document["_id"]}) + else: + id = ObjectId() + document["_id"] = id + document = ChainMap(document, {"_id": id}) cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) diff --git a/test/mockupdb/test_id_ordering.py b/test/mockupdb/test_id_ordering.py index 8dfc7d9746..3facf6f664 100644 --- a/test/mockupdb/test_id_ordering.py +++ b/test/mockupdb/test_id_ordering.py @@ -48,22 +48,31 @@ def test_id_ordering(self): server.run() self.addCleanup(server.stop) + # We also verify that the original document contains an _id field after each insert + document = {"x": 1} + client = self.simple_client(server.uri, loadBalanced=True) collection = client.db.coll - with going(collection.insert_one, {"x": 1}): + with going(collection.insert_one, document): request = server.receives() self.assertEqual("_id", next(iter(request["documents"][0]))) request.reply({"ok": 1}) + self.assertIn("_id", document) + + document = {"x1": 1} - with going(collection.bulk_write, [InsertOne({"x1": 1})]): + with going(collection.bulk_write, [InsertOne(document)]): request = server.receives() self.assertEqual("_id", next(iter(request["documents"][0]))) request.reply({"ok": 1}) + self.assertIn("_id", document) - with going(client.bulk_write, [InsertOne(namespace="db.coll", document={"x2": 1})]): + document = {"x2": 1} + with going(client.bulk_write, [InsertOne(namespace="db.coll", document=document)]): request = server.receives() self.assertEqual("_id", next(iter(request["ops"][0]["document"]))) request.reply({"ok": 1}) + self.assertIn("_id", document) # Re-ordering user-supplied _id fields is not required by the spec, but PyMongo does it for performance reasons with going(collection.insert_one, {"x": 1, "_id": 111}): From 13568b1c0c09302751398e9d43a3ad24892968b4 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 31 Oct 2024 09:45:01 -0400 Subject: [PATCH 6/9] Move re-order logic for client bulkWrite into _client_batched_op_msg_impl --- pymongo/_client_bulk_shared.py | 3 ++- pymongo/asynchronous/client_bulk.py | 13 ++----------- pymongo/message.py | 7 +++++++ pymongo/monitoring.py | 4 ++-- pymongo/synchronous/client_bulk.py | 13 ++----------- 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/pymongo/_client_bulk_shared.py b/pymongo/_client_bulk_shared.py index 5722bdbe03..6bf86a61c0 100644 --- a/pymongo/_client_bulk_shared.py +++ b/pymongo/_client_bulk_shared.py @@ -16,7 +16,8 @@ """Constants, types, and classes shared across Client Bulk Write API implementations.""" from __future__ import annotations -from typing import TYPE_CHECKING, Any, ChainMap, Mapping, MutableMapping, NoReturn +from collections import ChainMap +from typing import TYPE_CHECKING, Any, Mapping, MutableMapping, NoReturn from pymongo.errors import ClientBulkWriteException, OperationFailure from pymongo.helpers_shared import _get_wce_doc diff --git a/pymongo/asynchronous/client_bulk.py b/pymongo/asynchronous/client_bulk.py index 63d4936589..96571c21eb 100644 --- a/pymongo/asynchronous/client_bulk.py +++ b/pymongo/asynchronous/client_bulk.py @@ -21,7 +21,6 @@ import copy import datetime import logging -from collections import ChainMap from collections.abc import MutableMapping from itertools import islice from typing import ( @@ -133,16 +132,8 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: """Add an insert document to the list of ops.""" validate_is_document_type("document", document) # Generate ObjectId client side. - if not isinstance(document, RawBSONDocument): - # Since the data document itself is nested within the insert document - # it won't be automatically re-ordered by the BSON conversion. - # We use ChainMap here to make the _id field the first field instead. - if "_id" in document: - document = ChainMap(document, {"_id": document["_id"]}) - else: - id = ObjectId() - document["_id"] = id - document = ChainMap(document, {"_id": id}) + if not (isinstance(document, RawBSONDocument) or "_id" in document): + document["_id"] = ObjectId() cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) diff --git a/pymongo/message.py b/pymongo/message.py index de77ccd382..3435bab335 100644 --- a/pymongo/message.py +++ b/pymongo/message.py @@ -24,6 +24,7 @@ import datetime import random import struct +from collections import ChainMap from io import BytesIO as _BytesIO from typing import ( TYPE_CHECKING, @@ -1111,6 +1112,12 @@ def _check_doc_size_limits( # key and the index of its namespace within ns_info as its value. op_doc[op_type] = ns_info[namespace] # type: ignore[index] + # Since the data document itself is nested within the insert document + # it won't be automatically re-ordered by the BSON conversion. + # We use ChainMap here to make the _id field the first field instead. + if real_op_type == "insert": + op_doc["document"] = ChainMap(op_doc["document"], {"_id": op_doc["document"]["_id"]}) # type: ignore[index] + # Encode current operation doc and, if newly added, namespace doc. op_doc_encoded = _dict_to_bson(op_doc, False, opts) op_length = len(op_doc_encoded) diff --git a/pymongo/monitoring.py b/pymongo/monitoring.py index f7541218dd..c3b7643a3a 100644 --- a/pymongo/monitoring.py +++ b/pymongo/monitoring.py @@ -189,8 +189,8 @@ def connection_checked_in(self, event): from __future__ import annotations import datetime -from collections import abc, namedtuple -from typing import TYPE_CHECKING, Any, ChainMap, Mapping, Optional, Sequence +from collections import ChainMap, abc, namedtuple +from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence from bson.objectid import ObjectId from pymongo.hello import Hello, HelloCompat diff --git a/pymongo/synchronous/client_bulk.py b/pymongo/synchronous/client_bulk.py index 0e2f54e482..2c38b1d76c 100644 --- a/pymongo/synchronous/client_bulk.py +++ b/pymongo/synchronous/client_bulk.py @@ -21,7 +21,6 @@ import copy import datetime import logging -from collections import ChainMap from collections.abc import MutableMapping from itertools import islice from typing import ( @@ -133,16 +132,8 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: """Add an insert document to the list of ops.""" validate_is_document_type("document", document) # Generate ObjectId client side. - if not isinstance(document, RawBSONDocument): - # Since the data document itself is nested within the insert document - # it won't be automatically re-ordered by the BSON conversion. - # We use ChainMap here to make the _id field the first field instead. - if "_id" in document: - document = ChainMap(document, {"_id": document["_id"]}) - else: - id = ObjectId() - document["_id"] = id - document = ChainMap(document, {"_id": id}) + if not (isinstance(document, RawBSONDocument) or "_id" in document): + document["_id"] = ObjectId() cmd = {"insert": -1, "document": document} self.ops.append(("insert", cmd)) self.namespaces.append(namespace) From da83afc15e639d983866e42de720ed71ca6d4a98 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 31 Oct 2024 16:43:25 -0400 Subject: [PATCH 7/9] Isolate ChainMap + add RawBSONDocument not inflated test --- pymongo/_client_bulk_shared.py | 5 ----- pymongo/message.py | 8 ++++++-- pymongo/monitoring.py | 7 +------ test/asynchronous/test_client_bulk_write.py | 13 +++++++++++++ test/test_client_bulk_write.py | 13 +++++++++++++ 5 files changed, 33 insertions(+), 13 deletions(-) diff --git a/pymongo/_client_bulk_shared.py b/pymongo/_client_bulk_shared.py index 6bf86a61c0..649f1c6aa0 100644 --- a/pymongo/_client_bulk_shared.py +++ b/pymongo/_client_bulk_shared.py @@ -16,7 +16,6 @@ """Constants, types, and classes shared across Client Bulk Write API implementations.""" from __future__ import annotations -from collections import ChainMap from typing import TYPE_CHECKING, Any, Mapping, MutableMapping, NoReturn from pymongo.errors import ClientBulkWriteException, OperationFailure @@ -64,10 +63,6 @@ def _throw_client_bulk_write_exception( """Raise a ClientBulkWriteException from the full result.""" # retryWrites on MMAPv1 should raise an actionable error. if full_result["writeErrors"]: - # Unpack ChainMaps into the original document only - for doc in full_result["writeErrors"]: - if "document" in doc["op"] and isinstance(doc["op"]["document"], ChainMap): - doc["op"]["document"] = doc["op"]["document"].maps[0] full_result["writeErrors"].sort(key=lambda error: error["idx"]) err = full_result["writeErrors"][0] code = err["code"] diff --git a/pymongo/message.py b/pymongo/message.py index 3435bab335..2ec84fd933 100644 --- a/pymongo/message.py +++ b/pymongo/message.py @@ -1115,11 +1115,15 @@ def _check_doc_size_limits( # Since the data document itself is nested within the insert document # it won't be automatically re-ordered by the BSON conversion. # We use ChainMap here to make the _id field the first field instead. + doc_to_encode = op_doc if real_op_type == "insert": - op_doc["document"] = ChainMap(op_doc["document"], {"_id": op_doc["document"]["_id"]}) # type: ignore[index] + doc = op_doc["document"] + if not isinstance(doc, RawBSONDocument): + doc_to_encode = op_doc.copy() # Shallow copy + doc_to_encode["document"] = ChainMap(doc, {"_id": doc["_id"]}) # type: ignore[index] # Encode current operation doc and, if newly added, namespace doc. - op_doc_encoded = _dict_to_bson(op_doc, False, opts) + op_doc_encoded = _dict_to_bson(doc_to_encode, False, opts) op_length = len(op_doc_encoded) if ns_doc: ns_doc_encoded = _dict_to_bson(ns_doc, False, opts) diff --git a/pymongo/monitoring.py b/pymongo/monitoring.py index c3b7643a3a..96f88597d2 100644 --- a/pymongo/monitoring.py +++ b/pymongo/monitoring.py @@ -189,7 +189,7 @@ def connection_checked_in(self, event): from __future__ import annotations import datetime -from collections import ChainMap, abc, namedtuple +from collections import abc, namedtuple from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence from bson.objectid import ObjectId @@ -625,11 +625,6 @@ def __init__( raise ValueError(f"{command!r} is not a valid command") # Command name must be first key. command_name = next(iter(command)) - # Unpack ChainMaps into the original document only - if command_name == "bulkWrite" and "ops" in command: - for doc in command["ops"]: - if "document" in doc and isinstance(doc["document"], ChainMap): - doc["document"] = doc["document"].maps[0] super().__init__( command_name, request_id, diff --git a/test/asynchronous/test_client_bulk_write.py b/test/asynchronous/test_client_bulk_write.py index 9464337809..2fcf014a85 100644 --- a/test/asynchronous/test_client_bulk_write.py +++ b/test/asynchronous/test_client_bulk_write.py @@ -18,6 +18,8 @@ import os import sys +from bson import encode + sys.path[0:0] = [""] from test.asynchronous import ( @@ -84,6 +86,17 @@ async def test_formats_write_error_correctly(self): self.assertEqual(write_error["idx"], 1) self.assertEqual(write_error["op"], {"insert": 0, "document": {"_id": 1}}) + @async_client_context.require_version_min(8, 0, 0, -24) + @async_client_context.require_no_serverless + async def test_raw_bson_not_inflated(self): + doc = RawBSONDocument(encode({"a": "b" * 100})) + models = [ + InsertOne(namespace="db.coll", document=doc), + ] + await self.client.bulk_write(models=models) + + self.assertIsNone(doc._RawBSONDocument__inflated_doc) + # https://github.com/mongodb/specifications/tree/master/source/crud/tests class TestClientBulkWriteCRUD(AsyncIntegrationTest): diff --git a/test/test_client_bulk_write.py b/test/test_client_bulk_write.py index 58b5015dd2..e5df9613bb 100644 --- a/test/test_client_bulk_write.py +++ b/test/test_client_bulk_write.py @@ -18,6 +18,8 @@ import os import sys +from bson import encode + sys.path[0:0] = [""] from test import ( @@ -84,6 +86,17 @@ def test_formats_write_error_correctly(self): self.assertEqual(write_error["idx"], 1) self.assertEqual(write_error["op"], {"insert": 0, "document": {"_id": 1}}) + @client_context.require_version_min(8, 0, 0, -24) + @client_context.require_no_serverless + def test_raw_bson_not_inflated(self): + doc = RawBSONDocument(encode({"a": "b" * 100})) + models = [ + InsertOne(namespace="db.coll", document=doc), + ] + self.client.bulk_write(models=models) + + self.assertIsNone(doc._RawBSONDocument__inflated_doc) + # https://github.com/mongodb/specifications/tree/master/source/crud/tests class TestClientBulkWriteCRUD(IntegrationTest): From 8894f238283378f2105d5cbb0bc68d9cb3b0f776 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 31 Oct 2024 16:47:48 -0400 Subject: [PATCH 8/9] typing fixes --- pymongo/message.py | 2 +- test/asynchronous/test_client_bulk_write.py | 2 +- test/test_client_bulk_write.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pymongo/message.py b/pymongo/message.py index 2ec84fd933..3e2ae00ae7 100644 --- a/pymongo/message.py +++ b/pymongo/message.py @@ -1119,7 +1119,7 @@ def _check_doc_size_limits( if real_op_type == "insert": doc = op_doc["document"] if not isinstance(doc, RawBSONDocument): - doc_to_encode = op_doc.copy() # Shallow copy + doc_to_encode = op_doc.copy() # type: ignore[attr-defined] # Shallow copy doc_to_encode["document"] = ChainMap(doc, {"_id": doc["_id"]}) # type: ignore[index] # Encode current operation doc and, if newly added, namespace doc. diff --git a/test/asynchronous/test_client_bulk_write.py b/test/asynchronous/test_client_bulk_write.py index 2fcf014a85..af21ebe8ea 100644 --- a/test/asynchronous/test_client_bulk_write.py +++ b/test/asynchronous/test_client_bulk_write.py @@ -18,7 +18,7 @@ import os import sys -from bson import encode +from bson import RawBSONDocument, encode sys.path[0:0] = [""] diff --git a/test/test_client_bulk_write.py b/test/test_client_bulk_write.py index e5df9613bb..f3ecd3c041 100644 --- a/test/test_client_bulk_write.py +++ b/test/test_client_bulk_write.py @@ -18,7 +18,7 @@ import os import sys -from bson import encode +from bson import RawBSONDocument, encode sys.path[0:0] = [""] From b2dede309cdc7b82782f89736769bf6aaa1c416b Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Fri, 1 Nov 2024 09:17:10 -0400 Subject: [PATCH 9/9] Cleanup --- test/asynchronous/test_client_bulk_write.py | 3 ++- test/mockupdb/test_id_ordering.py | 3 ++- test/test_client_bulk_write.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/asynchronous/test_client_bulk_write.py b/test/asynchronous/test_client_bulk_write.py index af21ebe8ea..78b524b718 100644 --- a/test/asynchronous/test_client_bulk_write.py +++ b/test/asynchronous/test_client_bulk_write.py @@ -18,7 +18,8 @@ import os import sys -from bson import RawBSONDocument, encode +from bson import encode +from bson.raw_bson import RawBSONDocument sys.path[0:0] = [""] diff --git a/test/mockupdb/test_id_ordering.py b/test/mockupdb/test_id_ordering.py index 3facf6f664..7e2c91d592 100644 --- a/test/mockupdb/test_id_ordering.py +++ b/test/mockupdb/test_id_ordering.py @@ -33,8 +33,9 @@ pytestmark = pytest.mark.mockupdb +# https://github.com/mongodb/specifications/blob/master/source/crud/tests/README.md#16-generated-document-identifiers-are-the-first-field-in-their-document class TestIdOrdering(PyMongoTestCase): - def test_id_ordering(self): + def test_16_generated_document_ids_are_first_field(self): server = MockupDB() server.autoresponds( "hello", diff --git a/test/test_client_bulk_write.py b/test/test_client_bulk_write.py index f3ecd3c041..d98d37a656 100644 --- a/test/test_client_bulk_write.py +++ b/test/test_client_bulk_write.py @@ -18,7 +18,8 @@ import os import sys -from bson import RawBSONDocument, encode +from bson import encode +from bson.raw_bson import RawBSONDocument sys.path[0:0] = [""]