Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ def _dict_to_bson(
try:
elements.append(_element_to_bson(key, value, check_keys, opts))
except InvalidDocument as err:
raise InvalidDocument(f"Invalid document {doc} | {err}") from err
raise InvalidDocument(f"Invalid document: {err}", doc) from err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this InvalidDocument except clause include the encoding of _id too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be a pre-existing bug, no? Happy to fix if we identify why the code currently looks like this. Maybe we assume that _id will be valid if it reaches this far?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct it would be a pre-existing bug. Could you open a ticket for it?

>>>> encode({'_id': set()})
Traceback (most recent call last):
  File "<python-input-6>", line 1, in <module>
    encode({'_id': set()})
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1053, in encode
    return _dict_to_bson(document, check_keys, codec_options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1006, in _dict_to_bson
    elements.append(_name_value_to_bson(b"_id\x00", doc["_id"], check_keys, opts))
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 980, in _name_value_to_bson
    raise InvalidDocument(f"cannot encode object: {value!r}, of type: {type(value)!r}")
bson.errors.InvalidDocument: cannot encode object: set(), of type: <class 'set'>

vs the expected behavior:

>>>> encode({'_id': 1, 'foo': set()})
Traceback (most recent call last):
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1010, in _dict_to_bson
    elements.append(_element_to_bson(key, value, check_keys, opts))
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 994, in _element_to_bson
    return _name_value_to_bson(name, value, check_keys, opts)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 980, in _name_value_to_bson
    raise InvalidDocument(f"cannot encode object: {value!r}, of type: {type(value)!r}")
bson.errors.InvalidDocument: cannot encode object: set(), of type: <class 'set'>

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<python-input-7>", line 1, in <module>
    encode({'_id': 1, 'foo': set()})
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1053, in encode
    return _dict_to_bson(document, check_keys, codec_options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1012, in _dict_to_bson
    raise InvalidDocument(f"Invalid document {doc} | {err}") from err
bson.errors.InvalidDocument: Invalid document {'_id': 1, 'foo': set()} | cannot encode object: set(), of type: <class 'set'>

Copy link
Contributor Author

@NoahStapp NoahStapp Sep 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened PYTHON-5560 to track.

except AttributeError:
raise TypeError(f"encoder expected a mapping type but got: {doc!r}") from None

Expand Down
26 changes: 11 additions & 15 deletions bson/_cbsonmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1645,11 +1645,11 @@ static int write_raw_doc(buffer_t buffer, PyObject* raw, PyObject* _raw_str) {
}


/* Update Invalid Document error message to include doc.
/* Update Invalid Document error to include doc as a property.
*/
void handle_invalid_doc_error(PyObject* dict) {
PyObject *etype = NULL, *evalue = NULL, *etrace = NULL;
PyObject *msg = NULL, *dict_str = NULL, *new_msg = NULL;
PyObject *msg = NULL, *new_msg = NULL, *new_evalue = NULL;
PyErr_Fetch(&etype, &evalue, &etrace);
PyObject *InvalidDocument = _error("InvalidDocument");
if (InvalidDocument == NULL) {
Expand All @@ -1659,26 +1659,22 @@ void handle_invalid_doc_error(PyObject* dict) {
if (evalue && PyErr_GivenExceptionMatches(etype, InvalidDocument)) {
PyObject *msg = PyObject_Str(evalue);
if (msg) {
// Prepend doc to the existing message
PyObject *dict_str = PyObject_Str(dict);
if (dict_str == NULL) {
goto cleanup;
}
const char * dict_str_utf8 = PyUnicode_AsUTF8(dict_str);
if (dict_str_utf8 == NULL) {
goto cleanup;
}
const char * msg_utf8 = PyUnicode_AsUTF8(msg);
if (msg_utf8 == NULL) {
goto cleanup;
}
PyObject *new_msg = PyUnicode_FromFormat("Invalid document %s | %s", dict_str_utf8, msg_utf8);
PyObject *new_msg = PyUnicode_FromFormat("Invalid document: %s", msg_utf8);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For an invalid field in a nested doc, does this yield errors like "Invalid document: Invalid document: Invalid document: Invalid document: Invalid document: ...."?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it works as expected:

encode({"t": {"f": Wrapper(1)}})

# Produces
Invalid document: cannot encode object: 1, of type: <class 'test.test_bson.TestBSON.test_doc_in_invalid_document_error_as_property.<locals>.Wrapper'>

if (new_msg == NULL) {
goto cleanup;
}
// Add doc to the error instance as a property.
PyObject *new_evalue = PyObject_CallFunctionObjArgs(InvalidDocument, new_msg, dict, NULL);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to check the new_msg == NULL case before we call PyObject_CallFunctionObjArgs here.

Py_DECREF(evalue);
Py_DECREF(etype);
etype = InvalidDocument;
InvalidDocument = NULL;
if (new_msg) {
evalue = new_msg;
if (new_evalue) {
evalue = new_evalue;
} else {
evalue = msg;
}
Expand All @@ -1689,7 +1685,7 @@ void handle_invalid_doc_error(PyObject* dict) {
PyErr_Restore(etype, evalue, etrace);
Py_XDECREF(msg);
Py_XDECREF(InvalidDocument);
Py_XDECREF(dict_str);
Py_XDECREF(new_evalue);
Py_XDECREF(new_msg);
}

Expand Down
16 changes: 16 additions & 0 deletions bson/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"""Exceptions raised by the BSON package."""
from __future__ import annotations

from typing import Any, Optional


class BSONError(Exception):
"""Base class for all BSON exceptions."""
Expand All @@ -31,6 +33,20 @@ class InvalidStringData(BSONError):
class InvalidDocument(BSONError):
"""Raised when trying to create a BSON object from an invalid document."""

def __init__(self, message: str, document: Optional[Any] = None) -> None:
super().__init__(message)
self._document = document

@property
def document(self) -> Any:
"""The invalid document that caused the error.

..versionadded:: 4.16"""
return self._document

def _set_document(self, value: Any) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_set_document can be removed.

self._document = value


class InvalidId(BSONError):
"""Raised when trying to create an ObjectId from invalid data."""
9 changes: 9 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
=========

Changes in Version 4.16.0 (XXXX/XX/XX)
--------------------------------------

PyMongo 4.16 brings a number of changes including:

- Removed invalid documents from :class:`bson.errors.InvalidDocument` error messages as
doing so may leak sensitive user data.
Instead, invalid documents are stored in :attr:`bson.errors.InvalidDocument.document`.

Changes in Version 4.15.1 (2025/09/16)
--------------------------------------

Expand Down
15 changes: 11 additions & 4 deletions test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,7 +1163,7 @@ def __repr__(self):
):
encode({"t": Wrapper(1)})

def test_doc_in_invalid_document_error_message(self):
def test_doc_in_invalid_document_error_as_property(self):
class Wrapper:
def __init__(self, val):
self.val = val
Expand All @@ -1173,10 +1173,11 @@ def __repr__(self):

self.assertEqual("1", repr(Wrapper(1)))
doc = {"t": Wrapper(1)}
with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"):
with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm:
encode(doc)
self.assertEqual(cm.exception.document, doc)

def test_doc_in_invalid_document_error_message_mapping(self):
def test_doc_in_invalid_document_error_as_property_mapping(self):
class MyMapping(abc.Mapping):
def keys(self):
return ["t"]
Expand All @@ -1192,6 +1193,11 @@ def __len__(self):
def __iter__(self):
return iter(["t"])

def __eq__(self, other):
if isinstance(other, MyMapping):
return True
return False

class Wrapper:
def __init__(self, val):
self.val = val
Expand All @@ -1201,8 +1207,9 @@ def __repr__(self):

self.assertEqual("1", repr(Wrapper(1)))
doc = MyMapping()
with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"):
with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm:
encode(doc)
self.assertEqual(cm.exception.document, doc)


class TestCodecOptions(unittest.TestCase):
Expand Down
Loading