diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 65d54d1004d647..e0364b098d5312 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -1290,6 +1290,33 @@ def custom_handler(exc): self.assertTrue(_codecs_unregister_error(custom_name)) self.assertRaises(LookupError, codecs.lookup_error, custom_name) + def test_unicode_error_args(self): + def handler_decode(exc): + self.assertEqual(exc.args[0], exc.encoding) + self.assertEqual(exc.args[1], exc.object) + self.assertEqual(exc.args[2], exc.start) + self.assertEqual(exc.args[3], exc.end) + self.assertEqual(exc.args[4], exc.reason) + return '?', exc.end + + codecs.register_error('test_args', handler_decode) + result = b'\x80\xd0'.decode('utf-8', 'test_args') + self.assertEqual(result, '??') + self.assertTrue(_codecs_unregister_error('test_args')) + + def handler_encode(exc): + self.assertEqual(exc.args[0], exc.encoding) + self.assertEqual(exc.args[1], exc.object) + self.assertEqual(exc.args[2], exc.start) + self.assertEqual(exc.args[3], exc.end) + self.assertEqual(exc.args[4], exc.reason) + return b'?', exc.end + + codecs.register_error('test_args', handler_encode) + result = '\u1111 \u2222'.encode('ascii', 'test_args') + self.assertEqual(result, b'? ?') + self.assertTrue(_codecs_unregister_error('test_args')) + def test_unregister_custom_unknown_error_handler(self): unknown_name = 'test.test_unregister_custom_unknown_error_handler' self.assertRaises(LookupError, codecs.lookup_error, unknown_name) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 59f77f91d85e5c..386a14a98f81d1 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -502,7 +502,7 @@ def testAttributes(self): 'start' : 0, 'reason' : 'ordinal not in range'}), (UnicodeDecodeError, ('ascii', bytearray(b'\xff'), 0, 1, 'ordinal not in range'), {}, - {'args' : ('ascii', bytearray(b'\xff'), 0, 1, + {'args' : ('ascii', b'\xff', 0, 1, 'ordinal not in range'), 'encoding' : 'ascii', 'object' : b'\xff', 'start' : 0, 'reason' : 'ordinal not in range'}), diff --git a/Misc/NEWS.d/next/Library/2025-09-23-20-05-40.gh-issue-58038.0jJA9j.rst b/Misc/NEWS.d/next/Library/2025-09-23-20-05-40.gh-issue-58038.0jJA9j.rst new file mode 100644 index 00000000000000..f4fd2ff54408a0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-23-20-05-40.gh-issue-58038.0jJA9j.rst @@ -0,0 +1,2 @@ +:exc:`UnicodeDecodeError` and :exc:`UnicodeEncodeError` now update the ``args`` +tuple. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 531ee48eaf8a24..f81d5c49b04a67 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3223,6 +3223,23 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) #define PyUnicodeErrorObject_CAST(op) \ (assert(PyUnicodeError_Check(op)), ((PyUnicodeErrorObject *)(op))) +static PyObject * +UnicodeError_args_get(PyObject *op, void *context) +{ + PyUnicodeErrorObject *self = PyUnicodeErrorObject_CAST(op); + return Py_BuildValue("(OOnnO)", + self->encoding ? self->encoding : Py_None, + self->object ? self->object : Py_None, + self->start, + self->end, + self->reason ? self->reason : Py_None); +} + +static PyGetSetDef UnicodeError_getset[] = { + {"args", UnicodeError_args_get, NULL, NULL}, + {NULL} +}; + /* Assert some properties of the adjusted 'end' value. */ #ifndef NDEBUG static void @@ -3734,7 +3751,7 @@ static PyTypeObject _PyExc_UnicodeEncodeError = { Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, PyDoc_STR("Unicode encoding error."), UnicodeError_traverse, UnicodeError_clear, 0, 0, 0, 0, 0, UnicodeError_members, - 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict), + UnicodeError_getset, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict), UnicodeEncodeError_init, 0, BaseException_new, }; PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError; @@ -3847,7 +3864,7 @@ static PyTypeObject _PyExc_UnicodeDecodeError = { Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, PyDoc_STR("Unicode decoding error."), UnicodeError_traverse, UnicodeError_clear, 0, 0, 0, 0, 0, UnicodeError_members, - 0, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict), + UnicodeError_getset, &_PyExc_UnicodeError, 0, 0, 0, offsetof(PyUnicodeErrorObject, dict), UnicodeDecodeError_init, 0, BaseException_new, }; PyObject *PyExc_UnicodeDecodeError = (PyObject *)&_PyExc_UnicodeDecodeError;