From 266caf02c4edb295eeafd1d7fcba0c8a52d2e611 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 23 Sep 2025 14:31:35 -0400 Subject: [PATCH 1/5] PYTHON-5449 - Do not attach invalid document in exception message (#2539) --- bson/__init__.py | 2 +- bson/_cbsonmodule.c | 26 +++++++++++--------------- bson/errors.py | 13 +++++++++++++ doc/changelog.rst | 9 +++++++++ test/test_bson.py | 15 +++++++++++---- 5 files changed, 45 insertions(+), 20 deletions(-) diff --git a/bson/__init__.py b/bson/__init__.py index e677d8cfdd..d260fb876f 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -1009,7 +1009,7 @@ def _dict_to_bson( try: elements.append(_element_to_bson(key, value, check_keys, opts)) except InvalidDocument as err: - raise InvalidDocument(f"Invalid document {doc} | {err}") from err + raise InvalidDocument(f"Invalid document: {err}", doc) from err except AttributeError: raise TypeError(f"encoder expected a mapping type but got: {doc!r}") from None diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index be91e41734..bee7198567 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -1645,11 +1645,11 @@ static int write_raw_doc(buffer_t buffer, PyObject* raw, PyObject* _raw_str) { } -/* Update Invalid Document error message to include doc. +/* Update Invalid Document error to include doc as a property. */ void handle_invalid_doc_error(PyObject* dict) { PyObject *etype = NULL, *evalue = NULL, *etrace = NULL; - PyObject *msg = NULL, *dict_str = NULL, *new_msg = NULL; + PyObject *msg = NULL, *new_msg = NULL, *new_evalue = NULL; PyErr_Fetch(&etype, &evalue, &etrace); PyObject *InvalidDocument = _error("InvalidDocument"); if (InvalidDocument == NULL) { @@ -1659,26 +1659,22 @@ void handle_invalid_doc_error(PyObject* dict) { if (evalue && PyErr_GivenExceptionMatches(etype, InvalidDocument)) { PyObject *msg = PyObject_Str(evalue); if (msg) { - // Prepend doc to the existing message - PyObject *dict_str = PyObject_Str(dict); - if (dict_str == NULL) { - goto cleanup; - } - const char * dict_str_utf8 = PyUnicode_AsUTF8(dict_str); - if (dict_str_utf8 == NULL) { - goto cleanup; - } const char * msg_utf8 = PyUnicode_AsUTF8(msg); if (msg_utf8 == NULL) { goto cleanup; } - PyObject *new_msg = PyUnicode_FromFormat("Invalid document %s | %s", dict_str_utf8, msg_utf8); + PyObject *new_msg = PyUnicode_FromFormat("Invalid document: %s", msg_utf8); + if (new_msg == NULL) { + goto cleanup; + } + // Add doc to the error instance as a property. + PyObject *new_evalue = PyObject_CallFunctionObjArgs(InvalidDocument, new_msg, dict, NULL); Py_DECREF(evalue); Py_DECREF(etype); etype = InvalidDocument; InvalidDocument = NULL; - if (new_msg) { - evalue = new_msg; + if (new_evalue) { + evalue = new_evalue; } else { evalue = msg; } @@ -1689,7 +1685,7 @@ void handle_invalid_doc_error(PyObject* dict) { PyErr_Restore(etype, evalue, etrace); Py_XDECREF(msg); Py_XDECREF(InvalidDocument); - Py_XDECREF(dict_str); + Py_XDECREF(new_evalue); Py_XDECREF(new_msg); } diff --git a/bson/errors.py b/bson/errors.py index a3699e704c..ffc117f7ac 100644 --- a/bson/errors.py +++ b/bson/errors.py @@ -15,6 +15,8 @@ """Exceptions raised by the BSON package.""" from __future__ import annotations +from typing import Any, Optional + class BSONError(Exception): """Base class for all BSON exceptions.""" @@ -31,6 +33,17 @@ class InvalidStringData(BSONError): class InvalidDocument(BSONError): """Raised when trying to create a BSON object from an invalid document.""" + def __init__(self, message: str, document: Optional[Any] = None) -> None: + super().__init__(message) + self._document = document + + @property + def document(self) -> Any: + """The invalid document that caused the error. + + ..versionadded:: 4.16""" + return self._document + class InvalidId(BSONError): """Raised when trying to create an ObjectId from invalid data.""" diff --git a/doc/changelog.rst b/doc/changelog.rst index 082c22fafc..7270043d41 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,15 @@ Changelog ========= +Changes in Version 4.16.0 (XXXX/XX/XX) +-------------------------------------- + +PyMongo 4.16 brings a number of changes including: + +- Removed invalid documents from :class:`bson.errors.InvalidDocument` error messages as + doing so may leak sensitive user data. + Instead, invalid documents are stored in :attr:`bson.errors.InvalidDocument.document`. + Changes in Version 4.15.1 (2025/09/16) -------------------------------------- diff --git a/test/test_bson.py b/test/test_bson.py index e4cf85c46c..f792db1e89 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -1163,7 +1163,7 @@ def __repr__(self): ): encode({"t": Wrapper(1)}) - def test_doc_in_invalid_document_error_message(self): + def test_doc_in_invalid_document_error_as_property(self): class Wrapper: def __init__(self, val): self.val = val @@ -1173,10 +1173,11 @@ def __repr__(self): self.assertEqual("1", repr(Wrapper(1))) doc = {"t": Wrapper(1)} - with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"): + with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm: encode(doc) + self.assertEqual(cm.exception.document, doc) - def test_doc_in_invalid_document_error_message_mapping(self): + def test_doc_in_invalid_document_error_as_property_mapping(self): class MyMapping(abc.Mapping): def keys(self): return ["t"] @@ -1192,6 +1193,11 @@ def __len__(self): def __iter__(self): return iter(["t"]) + def __eq__(self, other): + if isinstance(other, MyMapping): + return True + return False + class Wrapper: def __init__(self, val): self.val = val @@ -1201,8 +1207,9 @@ def __repr__(self): self.assertEqual("1", repr(Wrapper(1))) doc = MyMapping() - with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"): + with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm: encode(doc) + self.assertEqual(cm.exception.document, doc) class TestCodecOptions(unittest.TestCase): From 29c4c2cc0ff772ac3a5ef9907f4974b2cef50eca Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 23 Sep 2025 14:08:13 -0500 Subject: [PATCH 2/5] PYTHON-5570 Do not freeze the lockfile (#2555) --- .evergreen/scripts/setup-dev-env.sh | 4 ++-- .pre-commit-config.yaml | 17 +++++++++++------ justfile | 4 +--- uv.lock | 2 +- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/.evergreen/scripts/setup-dev-env.sh b/.evergreen/scripts/setup-dev-env.sh index 6e6b5965bd..38824f32b7 100755 --- a/.evergreen/scripts/setup-dev-env.sh +++ b/.evergreen/scripts/setup-dev-env.sh @@ -47,13 +47,13 @@ if [ -f $HOME/.visualStudioEnv.sh ]; then SSH_TTY=1 source $HOME/.visualStudioEnv.sh set -u fi -uv sync --frozen +uv sync echo "Setting up python environment... done." # Ensure there is a pre-commit hook if there is a git checkout. if [ -d .git ] && [ ! -f .git/hooks/pre-commit ]; then - uv run --frozen pre-commit install + uv run pre-commit install fi popd > /dev/null diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ac129f95f0..d2b9d9a17a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -105,12 +105,6 @@ repos: # - test/test_client.py:188: te ==> the, be, we, to args: ["-L", "fle,fo,infinit,isnt,nin,te,aks"] -- repo: https://github.com/astral-sh/uv-pre-commit - # uv version. - rev: 0.8.17 - hooks: - - id: uv-lock - - repo: local hooks: - id: executable-shell @@ -128,3 +122,14 @@ repos: language: python require_serial: true additional_dependencies: ["shrub.py>=3.10.0", "pyyaml>=6.0.2"] + + - id: uv-lock + name: uv-lock + entry: uv lock + language: python + require_serial: true + files: ^(uv\.lock|pyproject\.toml|requirements.txt|requirements/.*\.txt)$ + pass_filenames: false + fail_fast: true + additional_dependencies: + - "uv>=0.8.4" diff --git a/justfile b/justfile index c129b2c199..4645a4a47d 100644 --- a/justfile +++ b/justfile @@ -1,7 +1,5 @@ # See https://just.systems/man/en/ for instructions set shell := ["bash", "-c"] -# Do not modify the lock file when running justfile commands. -export UV_FROZEN := "1" # Commonly used command segments. typing_run := "uv run --group typing --extra aws --extra encryption --extra ocsp --extra snappy --extra test --extra zstd" @@ -16,7 +14,7 @@ default: [private] resync: - @uv sync --quiet --frozen + @uv sync --quiet install: bash .evergreen/scripts/setup-dev-env.sh diff --git a/uv.lock b/uv.lock index ce0367e872..5a7279e1ef 100644 --- a/uv.lock +++ b/uv.lock @@ -1309,7 +1309,7 @@ requires-dist = [ { name = "furo", marker = "extra == 'docs'", specifier = "==2025.7.19" }, { name = "importlib-metadata", marker = "python_full_version < '3.13' and extra == 'test'", specifier = ">=7.0" }, { name = "pykerberos", marker = "os_name != 'nt' and extra == 'gssapi'" }, - { name = "pymongo-auth-aws", marker = "extra == 'aws'", specifier = ">=1.1.0,<2.0.0" }, + { name = "pymongo-auth-aws", marker = "extra == 'aws'", specifier = ">=1.1.1,<2.0.0" }, { name = "pymongo-auth-aws", marker = "extra == 'encryption'", specifier = ">=1.1.0,<2.0.0" }, { name = "pymongocrypt", marker = "extra == 'encryption'", specifier = ">=1.13.0,<2.0.0" }, { name = "pyopenssl", marker = "extra == 'ocsp'", specifier = ">=17.2.0" }, From 99ea7398c8021ab06f978f7e321e6d62fe429923 Mon Sep 17 00:00:00 2001 From: Iris Ho Date: Tue, 23 Sep 2025 16:09:23 -0700 Subject: [PATCH 3/5] create pull request template (not sure why uv lock changed) --- .github/PULL_REQUEST_TEMPLATE.md | 32 ++++++++++++++++++++++++++++++++ uv.lock | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000..ae9764dc5d --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,32 @@ + +## Summary +_What conceptually is this PR introducing? If context is already provided from the JIRA ticket, still place it in the +Pull Request as you should not make the reviewer do digging for a basic summary._ + +## Changes in this PR +_What changes did you make to the code? What new APIs (public or private) were added, removed, or edited to generate +the desired outcome explained in the above summary?_ + +## Test Plan +_How did you test the code? If you added unit tests, you can say that. If you didn’t introduce unit tests, explain why. +All code should be tested in some way – so please list what your validation strategy was._ + +### Screenshots (optional) +_Usually a great supplement to a test plan, especially if this requires local testing._ + +## Checklist +### Checklist for Author +- [ ] Did you update the changelog (if necessary)? +- [ ] Is the intention of the code captured in relevant tests? +- [ ] If there are new TODOs, has a related JIRA ticket been created? + +### Checklist for Reviewer {@primary_reviewer} +- [ ] Does the title of the PR reference a JIRA Ticket? +- [ ] Do you fully understand the implementation? (Would you be comfortable explaining how this code works to someone else?) +- [ ] Have you checked for spelling & grammar errors? +- [ ] Is all relevant documentation (README or docstring) updated? + +## Focus Areas for Reviewer +_List any complex portion of code you believe needs additional scrutiny and explain why._ + + diff --git a/uv.lock b/uv.lock index 5a7279e1ef..ce0367e872 100644 --- a/uv.lock +++ b/uv.lock @@ -1309,7 +1309,7 @@ requires-dist = [ { name = "furo", marker = "extra == 'docs'", specifier = "==2025.7.19" }, { name = "importlib-metadata", marker = "python_full_version < '3.13' and extra == 'test'", specifier = ">=7.0" }, { name = "pykerberos", marker = "os_name != 'nt' and extra == 'gssapi'" }, - { name = "pymongo-auth-aws", marker = "extra == 'aws'", specifier = ">=1.1.1,<2.0.0" }, + { name = "pymongo-auth-aws", marker = "extra == 'aws'", specifier = ">=1.1.0,<2.0.0" }, { name = "pymongo-auth-aws", marker = "extra == 'encryption'", specifier = ">=1.1.0,<2.0.0" }, { name = "pymongocrypt", marker = "extra == 'encryption'", specifier = ">=1.13.0,<2.0.0" }, { name = "pyopenssl", marker = "extra == 'ocsp'", specifier = ">=17.2.0" }, From eb65cc0f9d10b3fb66100c47909f09d86701909e Mon Sep 17 00:00:00 2001 From: Iris Ho Date: Mon, 29 Sep 2025 10:49:10 -0700 Subject: [PATCH 4/5] change section descriptions to be comments to the PR author --- .github/PULL_REQUEST_TEMPLATE.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ae9764dc5d..7cf2e482b0 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,20 +1,21 @@ ## Summary -_What conceptually is this PR introducing? If context is already provided from the JIRA ticket, still place it in the -Pull Request as you should not make the reviewer do digging for a basic summary._ + ## Changes in this PR -_What changes did you make to the code? What new APIs (public or private) were added, removed, or edited to generate -the desired outcome explained in the above summary?_ + ## Test Plan -_How did you test the code? If you added unit tests, you can say that. If you didn’t introduce unit tests, explain why. -All code should be tested in some way – so please list what your validation strategy was._ + ### Screenshots (optional) -_Usually a great supplement to a test plan, especially if this requires local testing._ + ## Checklist + ### Checklist for Author - [ ] Did you update the changelog (if necessary)? - [ ] Is the intention of the code captured in relevant tests? @@ -27,6 +28,6 @@ _Usually a great supplement to a test plan, especially if this requires local te - [ ] Is all relevant documentation (README or docstring) updated? ## Focus Areas for Reviewer -_List any complex portion of code you believe needs additional scrutiny and explain why._ + From f9b42155e0bb1113f869d359c3ca9dfd935eae96 Mon Sep 17 00:00:00 2001 From: Iris Ho Date: Tue, 30 Sep 2025 11:46:31 -0700 Subject: [PATCH 5/5] rename to lowercase? --- .github/{PULL_REQUEST_TEMPLATE.md => pull_request_template.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{PULL_REQUEST_TEMPLATE.md => pull_request_template.md} (100%) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/pull_request_template.md similarity index 100% rename from .github/PULL_REQUEST_TEMPLATE.md rename to .github/pull_request_template.md