mongodb · NoahStapp · Nov 12, 2024 · Oct 28, 2024 · Oct 29, 2024 · Oct 30, 2024
diff --git a/pymongo/_client_bulk_shared.py b/pymongo/_client_bulk_shared.py
@@ -16,6 +16,7 @@
 """Constants, types, and classes shared across Client Bulk Write API implementations."""
 from __future__ import annotations
 
+from collections import ChainMap
 from typing import TYPE_CHECKING, Any, Mapping, MutableMapping, NoReturn
 
 from pymongo.errors import ClientBulkWriteException, OperationFailure
@@ -63,6 +64,10 @@ def _throw_client_bulk_write_exception(
     """Raise a ClientBulkWriteException from the full result."""
     # retryWrites on MMAPv1 should raise an actionable error.
     if full_result["writeErrors"]:
+        # Unpack ChainMaps into the original document only
+        for doc in full_result["writeErrors"]:
+            if "document" in doc["op"] and isinstance(doc["op"]["document"], ChainMap):
+                doc["op"]["document"] = doc["op"]["document"].maps[0]
         full_result["writeErrors"].sort(key=lambda error: error["idx"])
         err = full_result["writeErrors"][0]
         code = err["code"]

@@ -24,6 +24,7 @@
 import datetime
 import random
 import struct
+from collections import ChainMap
 from io import BytesIO as _BytesIO
 from typing import (
     TYPE_CHECKING,
@@ -1111,6 +1112,12 @@ def _check_doc_size_limits(
         # key and the index of its namespace within ns_info as its value.
         op_doc[op_type] = ns_info[namespace]  # type: ignore[index]
 
+        # Since the data document itself is nested within the insert document
+        # it won't be automatically re-ordered by the BSON conversion.
+        # We use ChainMap here to make the _id field the first field instead.
+        if real_op_type == "insert":
+            op_doc["document"] = ChainMap(op_doc["document"], {"_id": op_doc["document"]["_id"]})  # type: ignore[index]
+
         # Encode current operation doc and, if newly added, namespace doc.
         op_doc_encoded = _dict_to_bson(op_doc, False, opts)
         op_length = len(op_doc_encoded)

diff --git a/pymongo/monitoring.py b/pymongo/monitoring.py
@@ -189,7 +189,7 @@ def connection_checked_in(self, event):
 from __future__ import annotations
 
 import datetime
-from collections import abc, namedtuple
+from collections import ChainMap, abc, namedtuple
 from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence
 
 from bson.objectid import ObjectId
@@ -625,6 +625,11 @@ def __init__(
             raise ValueError(f"{command!r} is not a valid command")
         # Command name must be first key.
         command_name = next(iter(command))
+        # Unpack ChainMaps into the original document only
+        if command_name == "bulkWrite" and "ops" in command:
+            for doc in command["ops"]:
+                if "document" in doc and isinstance(doc["document"], ChainMap):
+                    doc["document"] = doc["document"].maps[0]
         super().__init__(
             command_name,
             request_id,

@@ -0,0 +1,93 @@
+# Copyright 2024-present MongoDB, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from test import PyMongoTestCase
+
+import pytest
+
+from pymongo import InsertOne
+
+try:
+    from mockupdb import MockupDB, OpMsg, go, going
+
+    _HAVE_MOCKUPDB = True
+except ImportError:
+    _HAVE_MOCKUPDB = False
+
+
+from bson.objectid import ObjectId
+
+pytestmark = pytest.mark.mockupdb
+
+
+class TestIdOrdering(PyMongoTestCase):
+    def test_id_ordering(self):
+        server = MockupDB()
+        server.autoresponds(
+            "hello",
+            isWritablePrimary=True,
+            msg="isdbgrid",
+            minWireVersion=0,
+            maxWireVersion=25,
+            helloOk=True,
+            serviceId=ObjectId(),
+        )
+        server.run()
+        self.addCleanup(server.stop)
+
+        # We also verify that the original document contains an _id field after each insert
+        document = {"x": 1}
+
+        client = self.simple_client(server.uri, loadBalanced=True)
+        collection = client.db.coll
+        with going(collection.insert_one, document):
+            request = server.receives()
+            self.assertEqual("_id", next(iter(request["documents"][0])))
+            request.reply({"ok": 1})
+        self.assertIn("_id", document)
+
+        document = {"x1": 1}
+
+        with going(collection.bulk_write, [InsertOne(document)]):
+            request = server.receives()
+            self.assertEqual("_id", next(iter(request["documents"][0])))
+            request.reply({"ok": 1})
+        self.assertIn("_id", document)
+
+        document = {"x2": 1}
+        with going(client.bulk_write, [InsertOne(namespace="db.coll", document=document)]):
+            request = server.receives()
+            self.assertEqual("_id", next(iter(request["ops"][0]["document"])))
+            request.reply({"ok": 1})
+        self.assertIn("_id", document)
+
+        # Re-ordering user-supplied _id fields is not required by the spec, but PyMongo does it for performance reasons
+        with going(collection.insert_one, {"x": 1, "_id": 111}):
+            request = server.receives()
+            self.assertEqual("_id", next(iter(request["documents"][0])))
+            request.reply({"ok": 1})
+
+        with going(collection.bulk_write, [InsertOne({"x1": 1, "_id": 1111})]):
+            request = server.receives()
+            self.assertEqual("_id", next(iter(request["documents"][0])))
+            request.reply({"ok": 1})
+
+        with going(
+            client.bulk_write, [InsertOne(namespace="db.coll", document={"x2": 1, "_id": 11111})]
+        ):
+            request = server.receives()
+            self.assertEqual("_id", next(iter(request["ops"][0]["document"])))
+            request.reply({"ok": 1})