diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index bf0bc53b634022..20c617f8108d5f 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1360,6 +1360,43 @@ def test_unicode_error_str_does_not_crash(self): exc = UnicodeDecodeError('utf-8', encoded, start, end, '') self.assertIsInstance(str(exc), str) + def test_unicode_error_evil_str_set_none_object(self): + def side_effect(exc): + exc.object = None + self.do_test_unicode_error_mutate(side_effect) + + def test_unicode_error_evil_str_del_self_object(self): + def side_effect(exc): + del exc.object + self.do_test_unicode_error_mutate(side_effect) + + def do_test_unicode_error_mutate(self, side_effect): + # Test that str(UnicodeError(...)) does not crash when + # side-effects mutate the underlying 'object' attribute. + # See https://github.com/python/cpython/issues/128974. + + class Evil(str): + def __str__(self): + side_effect(exc) + return self + + for reason, encoding in [ + ("reason", Evil("utf-8")), + (Evil("reason"), "utf-8"), + (Evil("reason"), Evil("utf-8")), + ]: + with self.subTest(encoding=encoding, reason=reason): + with self.subTest(UnicodeEncodeError): + exc = UnicodeEncodeError(encoding, "x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + with self.subTest(UnicodeDecodeError): + exc = UnicodeDecodeError(encoding, b"x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + + with self.subTest(UnicodeTranslateError): + exc = UnicodeTranslateError("x", 0, 1, Evil("reason")) + self.assertRaises(TypeError, str, exc) + @no_tracing def test_badisinstance(self): # Bug #2542: if issubclass(e, MyException) raises an exception, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst new file mode 100644 index 00000000000000..fc4453ae3f2644 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst @@ -0,0 +1,3 @@ +Fix a crash in :meth:`UnicodeError.__str__ ` when custom +attributes implement :meth:`~object.__str__` with side-effects. +Patch by Bénédikt Tran. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 28c7fdbd47ba8d..e30fea0f37a925 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2784,6 +2784,8 @@ SyntaxError_str(PyObject *op) if (!filename && !have_lineno) return PyObject_Str(self->msg ? self->msg : Py_None); + // Even if 'filename' can be an instance of a subclass of 'str', + // we only render its "true" content and do not use str(filename). if (filename && have_lineno) result = PyUnicode_FromFormat("%S (%U, line %ld)", self->msg ? self->msg : Py_None, @@ -2903,29 +2905,47 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, /* * Check the validity of 'attr' as a unicode or bytes object depending - * on 'as_bytes' and return a new reference on it if it is the case. + * on 'as_bytes'. * * The 'name' is the attribute name and is only used for error reporting. * - * On success, this returns a strong reference on 'attr'. - * On failure, this sets a TypeError and returns NULL. + * On success, this returns 0. + * On failure, this sets a TypeError and returns -1. */ -static PyObject * -as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) +static int +check_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) { assert(as_bytes == 0 || as_bytes == 1); if (attr == NULL) { - PyErr_Format(PyExc_TypeError, "%s attribute not set", name); - return NULL; + PyErr_Format(PyExc_TypeError, + "UnicodeError '%s' attribute is not set", + name); + return -1; } if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { PyErr_Format(PyExc_TypeError, - "%s attribute must be %s", - name, - as_bytes ? "bytes" : "unicode"); - return NULL; + "UnicodeError '%s' attribute must be a %s", + name, as_bytes ? "bytes" : "string"); + return -1; } - return Py_NewRef(attr); + return 0; +} + + +/* + * Check the validity of 'attr' as a unicode or bytes object depending + * on 'as_bytes' and return a new reference on it if it is the case. + * + * The 'name' is the attribute name and is only used for error reporting. + * + * On success, this returns a strong reference on 'attr'. + * On failure, this sets a TypeError and returns NULL. + */ +static PyObject * +as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) +{ + int rc = check_unicode_error_attribute(attr, name, as_bytes); + return rc < 0 ? NULL : Py_NewRef(attr); } @@ -3591,7 +3611,10 @@ UnicodeEncodeError_str(PyObject *self) if (encoding_str == NULL) { goto done; } - + // calls to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", false) < 0) { + goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3711,7 +3734,10 @@ UnicodeDecodeError_str(PyObject *self) if (encoding_str == NULL) { goto done; } - + // calls to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", true) < 0) { + goto done; + } Py_ssize_t len = PyBytes_GET_SIZE(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3807,7 +3833,10 @@ UnicodeTranslateError_str(PyObject *self) if (reason_str == NULL) { goto done; } - + // call to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", false) < 0) { + goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end;