From 296372ebfe7c644aeb57c2539914ea4b7ea22350 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 16 Sep 2025 14:39:10 -0400 Subject: [PATCH 1/6] PYTHON-5449 - Do not attach invalid document in exception message --- bson/__init__.py | 2 +- bson/_cbsonmodule.c | 15 ++++----------- bson/errors.py | 15 +++++++++++++++ test/test_bson.py | 15 +++++++++++---- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/bson/__init__.py b/bson/__init__.py index b655e30c2c..6b2ba293a6 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -1009,7 +1009,7 @@ def _dict_to_bson( try: elements.append(_element_to_bson(key, value, check_keys, opts)) except InvalidDocument as err: - raise InvalidDocument(f"Invalid document {doc} | {err}") from err + raise InvalidDocument(f"Invalid document: {err}", doc) from err except AttributeError: raise TypeError(f"encoder expected a mapping type but got: {doc!r}") from None diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index be91e41734..5aa9553bc3 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -1645,7 +1645,7 @@ static int write_raw_doc(buffer_t buffer, PyObject* raw, PyObject* _raw_str) { } -/* Update Invalid Document error message to include doc. +/* Update Invalid Document error to include doc as a property. */ void handle_invalid_doc_error(PyObject* dict) { PyObject *etype = NULL, *evalue = NULL, *etrace = NULL; @@ -1659,20 +1659,13 @@ void handle_invalid_doc_error(PyObject* dict) { if (evalue && PyErr_GivenExceptionMatches(etype, InvalidDocument)) { PyObject *msg = PyObject_Str(evalue); if (msg) { - // Prepend doc to the existing message - PyObject *dict_str = PyObject_Str(dict); - if (dict_str == NULL) { - goto cleanup; - } - const char * dict_str_utf8 = PyUnicode_AsUTF8(dict_str); - if (dict_str_utf8 == NULL) { - goto cleanup; - } + // Add doc to the error class as a property. + PyObject_SetAttrString(InvalidDocument, "document", dict); const char * msg_utf8 = PyUnicode_AsUTF8(msg); if (msg_utf8 == NULL) { goto cleanup; } - PyObject *new_msg = PyUnicode_FromFormat("Invalid document %s | %s", dict_str_utf8, msg_utf8); + PyObject *new_msg = PyUnicode_FromFormat("Invalid document: %s", msg_utf8); Py_DECREF(evalue); Py_DECREF(etype); etype = InvalidDocument; diff --git a/bson/errors.py b/bson/errors.py index a3699e704c..4ab85b4094 100644 --- a/bson/errors.py +++ b/bson/errors.py @@ -15,6 +15,8 @@ """Exceptions raised by the BSON package.""" from __future__ import annotations +from typing import Any, Optional + class BSONError(Exception): """Base class for all BSON exceptions.""" @@ -31,6 +33,19 @@ class InvalidStringData(BSONError): class InvalidDocument(BSONError): """Raised when trying to create a BSON object from an invalid document.""" + def __init__(self, message: str, document: Optional[Any] = None) -> None: + super().__init__(message) + self._document = document + + @property + def document(self) -> Any: + """The invalid document that caused the error.""" + return self._document + + @document.setter + def document(self, value: Any) -> None: + self._document = value + class InvalidId(BSONError): """Raised when trying to create an ObjectId from invalid data.""" diff --git a/test/test_bson.py b/test/test_bson.py index e4cf85c46c..f792db1e89 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -1163,7 +1163,7 @@ def __repr__(self): ): encode({"t": Wrapper(1)}) - def test_doc_in_invalid_document_error_message(self): + def test_doc_in_invalid_document_error_as_property(self): class Wrapper: def __init__(self, val): self.val = val @@ -1173,10 +1173,11 @@ def __repr__(self): self.assertEqual("1", repr(Wrapper(1))) doc = {"t": Wrapper(1)} - with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"): + with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm: encode(doc) + self.assertEqual(cm.exception.document, doc) - def test_doc_in_invalid_document_error_message_mapping(self): + def test_doc_in_invalid_document_error_as_property_mapping(self): class MyMapping(abc.Mapping): def keys(self): return ["t"] @@ -1192,6 +1193,11 @@ def __len__(self): def __iter__(self): return iter(["t"]) + def __eq__(self, other): + if isinstance(other, MyMapping): + return True + return False + class Wrapper: def __init__(self, val): self.val = val @@ -1201,8 +1207,9 @@ def __repr__(self): self.assertEqual("1", repr(Wrapper(1))) doc = MyMapping() - with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"): + with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm: encode(doc) + self.assertEqual(cm.exception.document, doc) class TestCodecOptions(unittest.TestCase): From 4fb872e86f442ef7247f4c1c749a69a647f13e82 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 16 Sep 2025 15:19:28 -0400 Subject: [PATCH 2/6] Address review --- bson/errors.py | 4 +++- doc/changelog.rst | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/bson/errors.py b/bson/errors.py index 4ab85b4094..2da00adcad 100644 --- a/bson/errors.py +++ b/bson/errors.py @@ -39,7 +39,9 @@ def __init__(self, message: str, document: Optional[Any] = None) -> None: @property def document(self) -> Any: - """The invalid document that caused the error.""" + """The invalid document that caused the error. + + ..versionadded:: 4.16""" return self._document @document.setter diff --git a/doc/changelog.rst b/doc/changelog.rst index 082c22fafc..7270043d41 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,15 @@ Changelog ========= +Changes in Version 4.16.0 (XXXX/XX/XX) +-------------------------------------- + +PyMongo 4.16 brings a number of changes including: + +- Removed invalid documents from :class:`bson.errors.InvalidDocument` error messages as + doing so may leak sensitive user data. + Instead, invalid documents are stored in :attr:`bson.errors.InvalidDocument.document`. + Changes in Version 4.15.1 (2025/09/16) -------------------------------------- From ca9f09f11b3a12cdd8a83f01b95eaa0adab3231c Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 17 Sep 2025 14:36:52 -0400 Subject: [PATCH 3/6] Make document setter private --- bson/errors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bson/errors.py b/bson/errors.py index 2da00adcad..160d4e25b6 100644 --- a/bson/errors.py +++ b/bson/errors.py @@ -44,8 +44,7 @@ def document(self) -> Any: ..versionadded:: 4.16""" return self._document - @document.setter - def document(self, value: Any) -> None: + def _set_document(self, value: Any) -> None: self._document = value From 2319a43cdfd76d1f40b6c16f5e1e49102836653b Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 18 Sep 2025 10:13:11 -0400 Subject: [PATCH 4/6] Fix C bson code --- bson/_cbsonmodule.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 5aa9553bc3..667552201c 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -1649,7 +1649,7 @@ static int write_raw_doc(buffer_t buffer, PyObject* raw, PyObject* _raw_str) { */ void handle_invalid_doc_error(PyObject* dict) { PyObject *etype = NULL, *evalue = NULL, *etrace = NULL; - PyObject *msg = NULL, *dict_str = NULL, *new_msg = NULL; + PyObject *msg = NULL, *new_msg = NULL, *new_evalue = NULL; PyErr_Fetch(&etype, &evalue, &etrace); PyObject *InvalidDocument = _error("InvalidDocument"); if (InvalidDocument == NULL) { @@ -1659,19 +1659,19 @@ void handle_invalid_doc_error(PyObject* dict) { if (evalue && PyErr_GivenExceptionMatches(etype, InvalidDocument)) { PyObject *msg = PyObject_Str(evalue); if (msg) { - // Add doc to the error class as a property. - PyObject_SetAttrString(InvalidDocument, "document", dict); const char * msg_utf8 = PyUnicode_AsUTF8(msg); if (msg_utf8 == NULL) { goto cleanup; } PyObject *new_msg = PyUnicode_FromFormat("Invalid document: %s", msg_utf8); + // Add doc to the error instance as a property. + PyObject *new_evalue = PyObject_CallFunctionObjArgs(InvalidDocument, new_msg, dict, NULL); Py_DECREF(evalue); Py_DECREF(etype); etype = InvalidDocument; InvalidDocument = NULL; - if (new_msg) { - evalue = new_msg; + if (new_evalue) { + evalue = new_evalue; } else { evalue = msg; } @@ -1682,7 +1682,7 @@ void handle_invalid_doc_error(PyObject* dict) { PyErr_Restore(etype, evalue, etrace); Py_XDECREF(msg); Py_XDECREF(InvalidDocument); - Py_XDECREF(dict_str); + Py_XDECREF(new_evalue); Py_XDECREF(new_msg); } From 6c73d1e19e0da73107d12e60429fed715958e012 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 18 Sep 2025 15:00:59 -0400 Subject: [PATCH 5/6] Check for NULL --- bson/_cbsonmodule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 667552201c..bee7198567 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -1664,6 +1664,9 @@ void handle_invalid_doc_error(PyObject* dict) { goto cleanup; } PyObject *new_msg = PyUnicode_FromFormat("Invalid document: %s", msg_utf8); + if (new_msg == NULL) { + goto cleanup; + } // Add doc to the error instance as a property. PyObject *new_evalue = PyObject_CallFunctionObjArgs(InvalidDocument, new_msg, dict, NULL); Py_DECREF(evalue); From 1d28fdccf41e66ac6154063560c0a886ef04e127 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 23 Sep 2025 13:49:49 -0400 Subject: [PATCH 6/6] Remove _set_document --- bson/errors.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/bson/errors.py b/bson/errors.py index 160d4e25b6..ffc117f7ac 100644 --- a/bson/errors.py +++ b/bson/errors.py @@ -44,9 +44,6 @@ def document(self) -> Any: ..versionadded:: 4.16""" return self._document - def _set_document(self, value: Any) -> None: - self._document = value - class InvalidId(BSONError): """Raised when trying to create an ObjectId from invalid data."""