-
Notifications
You must be signed in to change notification settings - Fork 1.1k
PYTHON-4915 - Add guidance on adding _id fields to documents to CRUD spec, reorder client.bulk_write generated _id fields #1976
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
8906e84
425cd1b
1b3df52
36187bb
0e07e18
13568b1
da83afc
8894f23
b2dede3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
import copy | ||
import datetime | ||
import logging | ||
from collections import ChainMap | ||
from collections.abc import MutableMapping | ||
from itertools import islice | ||
from typing import ( | ||
|
@@ -132,8 +133,16 @@ def add_insert(self, namespace: str, document: _DocumentOut) -> None: | |
"""Add an insert document to the list of ops.""" | ||
validate_is_document_type("document", document) | ||
# Generate ObjectId client side. | ||
if not (isinstance(document, RawBSONDocument) or "_id" in document): | ||
document["_id"] = ObjectId() | ||
if not isinstance(document, RawBSONDocument): | ||
# Since the data document itself is nested within the insert document | ||
# it won't be automatically re-ordered by the BSON conversion. | ||
# We use ChainMap here to make the _id field the first field instead. | ||
if "_id" in document: | ||
document = ChainMap(document, {"_id": document["_id"]}) | ||
else: | ||
id = ObjectId() | ||
document["_id"] = id | ||
document = ChainMap(document, {"_id": id}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just realized this but what do you think about pushing this id-reordering logic down into # Encode current operation doc and, if newly added, namespace doc.
if real_op_type == "insert":
op_doc = ... # ChainMap stuff
op_doc_encoded = _dict_to_bson(op_doc, False, opts) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We'd still need to unwrap it, even with this change: # Started events
[{'bulkWrite': 1, 'errorsOnly': True, 'ordered': False, 'lsid': {'id': Binary(b'\xa3\xc7\x80\xdd\x07\x98L\x13\x81\xb8\xbcY\xe8\xa0\x04\xf3', 4)}, '$db': 'admin', 'ops': [{'insert': 0, 'document': ChainMap({'foo': 'bar', '_id': 5}, {'_id': 5})}, {'insert': 1, 'document': ChainMap({'foo': 'bar', '_id': 6}, {'_id': 6})}, {'insert': 0, 'document': ChainMap({'foo': 'bar', '_id': 5}, {'_id': 5})}, {'insert': 1, 'document': ChainMap({'foo': 'bar', '_id': 7}, {'_id': 7})}, {'delete': 0, 'filter': {'foo': 'bar', '_id': 5}, 'multi': False}], 'nsInfo': [{'ns': 'db.test_five'}, {'ns': 'db.test_six'}]}]
# Bulk write error
batch op errors occurred, full error: {'anySuccessful': True, 'error': None, 'writeErrors': [{'ok': 0.0, 'idx': 1, 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: db.test_six index: _id_ dup key: { _id: 6 }', 'keyPattern': {'_id': 1}, 'keyValue': {'_id': 6}, 'n': 0, 'op': {'insert': 1, 'document': ChainMap({'foo': 'bar', '_id': 6}, {'_id': 6})}}, {'ok': 0.0, 'idx': 2, 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: db.test_five index: _id_ dup key: { _id: 5 }', 'keyPattern': {'_id': 1}, 'keyValue': {'_id': 5}, 'n': 0, 'op': {'insert': 0, 'document': ChainMap({'foo': 'bar', '_id': 5}, {'_id': 5})}}, {'ok': 0.0, 'idx': 3, 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: db.test_six index: _id_ dup key: { _id: 7 }', 'keyPattern': {'_id': 1}, 'keyValue': {'_id': 7}, 'n': 0, 'op': {'insert': 1, 'document': ChainMap({'foo': 'bar', '_id': 7}, {'_id': 7})}}], 'writeConcernErrors': [], 'nInserted': 1, 'nUpserted': 0, 'nMatched': 0, 'nModified': 0, 'nDeleted': 1, 'insertResults': {}, 'updateResults': {}, 'deleteResults': {}} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still like moving the ChainMap logic into |
||
cmd = {"insert": -1, "document": document} | ||
self.ops.append(("insert", cmd)) | ||
self.namespaces.append(namespace) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,7 +190,7 @@ def connection_checked_in(self, event): | |
|
||
import datetime | ||
from collections import abc, namedtuple | ||
from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence | ||
from typing import TYPE_CHECKING, Any, ChainMap, Mapping, Optional, Sequence | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
from bson.objectid import ObjectId | ||
from pymongo.hello import Hello, HelloCompat | ||
|
@@ -625,6 +625,11 @@ def __init__( | |
raise ValueError(f"{command!r} is not a valid command") | ||
# Command name must be first key. | ||
command_name = next(iter(command)) | ||
# Unpack ChainMaps into the original document only | ||
if command_name == "bulkWrite" and "ops" in command: | ||
for doc in command["ops"]: | ||
if "document" in doc and isinstance(doc["document"], ChainMap): | ||
doc["document"] = doc["document"].maps[0] | ||
super().__init__( | ||
command_name, | ||
request_id, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Copyright 2024-present MongoDB, Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import annotations | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add the boilerplate License comment. |
||
|
||
from test import PyMongoTestCase | ||
|
||
import pytest | ||
|
||
from pymongo import InsertOne | ||
|
||
try: | ||
from mockupdb import MockupDB, OpMsg, go, going | ||
|
||
_HAVE_MOCKUPDB = True | ||
except ImportError: | ||
_HAVE_MOCKUPDB = False | ||
|
||
|
||
from bson.objectid import ObjectId | ||
|
||
pytestmark = pytest.mark.mockupdb | ||
|
||
|
||
class TestIdOrdering(PyMongoTestCase): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a link to the crud spec that describes this test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once the spec is merged, yes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was the spec merged? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. added! |
||
def test_id_ordering(self): | ||
server = MockupDB() | ||
server.autoresponds( | ||
"hello", | ||
isWritablePrimary=True, | ||
msg="isdbgrid", | ||
minWireVersion=0, | ||
maxWireVersion=25, | ||
helloOk=True, | ||
serviceId=ObjectId(), | ||
) | ||
server.run() | ||
self.addCleanup(server.stop) | ||
|
||
# We also verify that the original document contains an _id field after each insert | ||
document = {"x": 1} | ||
|
||
client = self.simple_client(server.uri, loadBalanced=True) | ||
collection = client.db.coll | ||
with going(collection.insert_one, document): | ||
request = server.receives() | ||
self.assertEqual("_id", next(iter(request["documents"][0]))) | ||
request.reply({"ok": 1}) | ||
self.assertIn("_id", document) | ||
|
||
document = {"x1": 1} | ||
|
||
with going(collection.bulk_write, [InsertOne(document)]): | ||
request = server.receives() | ||
self.assertEqual("_id", next(iter(request["documents"][0]))) | ||
request.reply({"ok": 1}) | ||
self.assertIn("_id", document) | ||
|
||
document = {"x2": 1} | ||
with going(client.bulk_write, [InsertOne(namespace="db.coll", document=document)]): | ||
request = server.receives() | ||
self.assertEqual("_id", next(iter(request["ops"][0]["document"]))) | ||
request.reply({"ok": 1}) | ||
self.assertIn("_id", document) | ||
|
||
# Re-ordering user-supplied _id fields is not required by the spec, but PyMongo does it for performance reasons | ||
with going(collection.insert_one, {"x": 1, "_id": 111}): | ||
request = server.receives() | ||
self.assertEqual("_id", next(iter(request["documents"][0]))) | ||
request.reply({"ok": 1}) | ||
|
||
with going(collection.bulk_write, [InsertOne({"x1": 1, "_id": 1111})]): | ||
request = server.receives() | ||
self.assertEqual("_id", next(iter(request["documents"][0]))) | ||
request.reply({"ok": 1}) | ||
|
||
with going( | ||
client.bulk_write, [InsertOne(namespace="db.coll", document={"x2": 1, "_id": 11111})] | ||
): | ||
request = server.receives() | ||
self.assertEqual("_id", next(iter(request["ops"][0]["document"]))) | ||
request.reply({"ok": 1}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be
from collections import ChainMap