From 24df4162f1b6b170953a3e7e2f3e70386a6e29ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:27:13 +0200 Subject: [PATCH 01/29] Specify constants for native error policies. --- Python/codecs.c | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 9c0a3fad314cb5..2c2119ffb5b656 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -16,6 +16,27 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI +#define CODECS_STRICT_ERROR_POLICY 0 +#define CODECS_IGNORE_ERROR_POLICY 1 +#define CODECS_REPLACE_ERROR_POLICY 2 +#define CODECS_XMLCHARREFREPLACE_ERROR_POLICY 3 +#define CODECS_BACKSLASHREPLACE_ERROR_POLICY 4 +#define CODECS_NAMEREPLACE_ERROR_POLICY 5 +#define CODECS_SURROGATEPASS_ERROR_POLICY 6 +#define CODECS_SURROGATEESCAPE_ERROR_POLICY 7 +#define CODECS_ERROR_POLICY_COUNT 8 + +static const char codecs_native_error_polcies[CODECS_ERROR_POLICY_COUNT][32] = { + [CODECS_STRICT_ERROR_POLICY] = "strict", + [CODECS_IGNORE_ERROR_POLICY] = "ignore", + [CODECS_REPLACE_ERROR_POLICY] = "replace", + [CODECS_XMLCHARREFREPLACE_ERROR_POLICY] = "xmlcharrefreplace", + [CODECS_BACKSLASHREPLACE_ERROR_POLICY] = "backslashreplace", + [CODECS_NAMEREPLACE_ERROR_POLICY] = "namereplace", + [CODECS_SURROGATEPASS_ERROR_POLICY] = "surrogatepass", + [CODECS_SURROGATEESCAPE_ERROR_POLICY] = "surrogateescape", +}; + const char *Py_hexdigits = "0123456789abcdef"; /* --- Codec Registry ----------------------------------------------------- */ @@ -1386,12 +1407,12 @@ PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) { static struct { - const char *name; + int policy; PyMethodDef def; - } methods[] = + } error_handlers[] = { { - "strict", + CODECS_STRICT_ERROR_POLICY, { "strict_errors", strict_errors, @@ -1401,7 +1422,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - "ignore", + CODECS_IGNORE_ERROR_POLICY, { "ignore_errors", ignore_errors, @@ -1411,7 +1432,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - "replace", + CODECS_REPLACE_ERROR_POLICY, { "replace_errors", replace_errors, @@ -1421,7 +1442,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - "xmlcharrefreplace", + CODECS_XMLCHARREFREPLACE_ERROR_POLICY, { "xmlcharrefreplace_errors", xmlcharrefreplace_errors, @@ -1432,7 +1453,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - "backslashreplace", + CODECS_BACKSLASHREPLACE_ERROR_POLICY, { "backslashreplace_errors", backslashreplace_errors, @@ -1443,7 +1464,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - "namereplace", + CODECS_NAMEREPLACE_ERROR_POLICY, { "namereplace_errors", namereplace_errors, @@ -1454,7 +1475,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - "surrogatepass", + CODECS_SURROGATEPASS_ERROR_POLICY, { "surrogatepass", surrogatepass_errors, @@ -1462,7 +1483,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - "surrogateescape", + CODECS_SURROGATEESCAPE_ERROR_POLICY, { "surrogateescape", surrogateescape_errors, @@ -1484,14 +1505,14 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) if (interp->codecs.error_registry == NULL) { return PyStatus_NoMemory(); } - for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { - PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); + for (size_t i = 0; i < Py_ARRAY_LENGTH(error_handlers); ++i) { + PyObject *func = PyCFunction_NewEx(&error_handlers[i].def, NULL, NULL); if (func == NULL) { return PyStatus_NoMemory(); } - int res = PyDict_SetItemString(interp->codecs.error_registry, - methods[i].name, func); + const char *name = codecs_native_error_polcies[error_handlers[i].policy]; + int res = PyDict_SetItemString(interp->codecs.error_registry, name, func); Py_DECREF(func); if (res < 0) { return PyStatus_Error("Failed to insert into codec error registry"); From a65c1b7cad4ba9372145d18a9867b3b127277b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:29:18 +0200 Subject: [PATCH 02/29] Add `PyCodec_UnregisterError` C API function --- Include/codecs.h | 12 ++++++++++++ Python/codecs.c | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/Include/codecs.h b/Include/codecs.h index 512a3c723eca18..8028902da2b02c 100644 --- a/Include/codecs.h +++ b/Include/codecs.h @@ -141,6 +141,18 @@ PyAPI_FUNC(PyObject *) PyCodec_StreamWriter( Return 0 on success, -1 on error */ PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030e0000 +/* Un-register the error handling callback function error under the given + name. Only non-native error handlers can be un-registered. + + - Return -1 and set an exception if 'name' is a native error policy, or + if an error occurred. + - Return 0 if no error handler is associated with the given error policy. + - Return 1 if the error handler was successfully removed. +*/ +PyAPI_FUNC(int) PyCodec_UnregisterError(const char *name); +#endif + /* Lookup the error handling callback function registered under the given name. As a special case NULL can be passed, in which case the error handling callback for "strict" will be returned. */ diff --git a/Python/codecs.c b/Python/codecs.c index 2c2119ffb5b656..b5c36156dd51db 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -639,6 +639,26 @@ int PyCodec_RegisterError(const char *name, PyObject *error) name, error); } +int PyCodec_UnregisterError(const char *name) +{ + for (size_t i = 0; i < CODECS_ERROR_POLICY_COUNT; ++i) { + if (strcmp(name, codecs_native_error_polcies[i]) == 0) { + PyErr_Format(PyExc_ValueError, + "cannot unregister standard error policy '%s'", name); + return -1; + } + } + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->codecs.initialized); + PyObject *handler = NULL; + if (PyDict_PopString(interp->codecs.error_registry, name, &handler) < 0) { + return -1; + } + int exists = handler == NULL ? 0 : 1; + Py_XDECREF(handler); + return exists; +} + /* Lookup the error handling callback function registered under the name error. As a special case NULL can be passed, in which case the error handling callback for strict encoding will be returned. */ From c1ff9e666452ff2958ceaa62bad40e4ad263b427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:29:36 +0200 Subject: [PATCH 03/29] Expose `PyCodec_UnregisterError` as `codecs.unregister_error` --- Modules/_codecsmodule.c | 21 ++++++++++++++ Modules/clinic/_codecsmodule.c.h | 49 +++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 32373f0799bfeb..09c754aec076a2 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -979,6 +979,26 @@ _codecs_register_error_impl(PyObject *module, const char *errors, Py_RETURN_NONE; } +/*[clinic input] +_codecs.unregister_error -> bool + errors: str + / + +Un-register the specified error handler under the name errors. + +Only custom handlers can be un-registered. Returns True if the +handler has been successfully un-registered, and False if the +errors policy is not recognized. An exception is raised when +the errors policy is not allowed. +[clinic start generated code]*/ + +static int +_codecs_unregister_error_impl(PyObject *module, const char *errors) +/*[clinic end generated code: output=d8c1418e402aecdf input=103c9dae16f32e08]*/ +{ + return PyCodec_UnregisterError(errors); +} + /*[clinic input] _codecs.lookup_error name: str @@ -1044,6 +1064,7 @@ static PyMethodDef _codecs_functions[] = { _CODECS_CODE_PAGE_ENCODE_METHODDEF _CODECS_CODE_PAGE_DECODE_METHODDEF _CODECS_REGISTER_ERROR_METHODDEF + _CODECS_UNREGISTER_ERROR_METHODDEF _CODECS_LOOKUP_ERROR_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index 1c0f37442ab350..e5a5c03d52d8a1 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -2683,6 +2683,53 @@ _codecs_register_error(PyObject *module, PyObject *const *args, Py_ssize_t nargs return return_value; } +PyDoc_STRVAR(_codecs_unregister_error__doc__, +"unregister_error($module, errors, /)\n" +"--\n" +"\n" +"Un-register the specified error handler under the name errors.\n" +"\n" +"Only custom handlers can be un-registered. Returns True if the\n" +"handler has been successfully un-registered, and False if the\n" +"errors policy is not recognized. An exception is raised when\n" +"the errors policy is not allowed."); + +#define _CODECS_UNREGISTER_ERROR_METHODDEF \ + {"unregister_error", (PyCFunction)_codecs_unregister_error, METH_O, _codecs_unregister_error__doc__}, + +static int +_codecs_unregister_error_impl(PyObject *module, const char *errors); + +static PyObject * +_codecs_unregister_error(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + const char *errors; + int _return_value; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("unregister_error", "argument", "str", arg); + goto exit; + } + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(arg, &errors_length); + if (errors == NULL) { + goto exit; + } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + _return_value = _codecs_unregister_error_impl(module, errors); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(_codecs_lookup_error__doc__, "lookup_error($module, name, /)\n" "--\n" @@ -2746,4 +2793,4 @@ _codecs_lookup_error(PyObject *module, PyObject *arg) #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=e50d5fdf65bd45fa input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d7cb9b22406515a2 input=a9049054013a1b77]*/ From 58370190b365d7039ed92a586eff80d705e457f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:34:39 +0200 Subject: [PATCH 04/29] Update stable ABI files. --- Doc/data/refcounts.dat | 3 +++ Doc/data/stable_abi.dat | 1 + Lib/test/test_stable_abi_ctypes.py | 1 + Misc/stable_abi.toml | 2 ++ PC/python3dll.c | 1 + 5 files changed, 8 insertions(+) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 65d48f8bea7de8..705af97c4b9f62 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -318,6 +318,9 @@ PyCodec_RegisterError:int::: PyCodec_RegisterError:const char*:name:: PyCodec_RegisterError:PyObject*:error:+1: +PyCodec_UnregisterError:int::: +PyCodec_UnregisterError:const char*:name:: + PyCodec_LookupError:PyObject*::+1: PyCodec_LookupError:const char*:name:: diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 19dc71a345b474..91c7156ebac4b0 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -87,6 +87,7 @@ func,PyCodec_StreamReader,3.2,, func,PyCodec_StreamWriter,3.2,, func,PyCodec_StrictErrors,3.2,, func,PyCodec_Unregister,3.10,, +func,PyCodec_UnregisterError,3.14,, func,PyCodec_XMLCharRefReplaceErrors,3.2,, func,PyComplex_FromDoubles,3.2,, func,PyComplex_ImagAsDouble,3.2,, diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index d16ad7ef5d4328..97a4bea7af976f 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -128,6 +128,7 @@ def test_windows_feature_macros(self): "PyCodec_StreamWriter", "PyCodec_StrictErrors", "PyCodec_Unregister", + "PyCodec_UnregisterError", "PyCodec_XMLCharRefReplaceErrors", "PyComplex_FromDoubles", "PyComplex_ImagAsDouble", diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index fe0a5e44f8fb15..14a90a334dd79a 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2510,6 +2510,8 @@ added = '3.14' [function.Py_REFCNT] added = '3.14' +[function.PyCodec_UnregisterError] + added = '3.14' [function.PyIter_NextItem] added = '3.14' [function.PyLong_FromInt32] diff --git a/PC/python3dll.c b/PC/python3dll.c index 6b8208ab90bd95..583c9a7b3c10bd 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -164,6 +164,7 @@ EXPORT_FUNC(PyCodec_StreamReader) EXPORT_FUNC(PyCodec_StreamWriter) EXPORT_FUNC(PyCodec_StrictErrors) EXPORT_FUNC(PyCodec_Unregister) +EXPORT_FUNC(PyCodec_UnregisterError) EXPORT_FUNC(PyCodec_XMLCharRefReplaceErrors) EXPORT_FUNC(PyComplex_FromDoubles) EXPORT_FUNC(PyComplex_ImagAsDouble) From 80d1ceb54b81863fa6ad9d875455994a8e5d61eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:38:38 +0200 Subject: [PATCH 05/29] update ignored.tsv --- Tools/c-analyzer/cpython/ignored.tsv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index f4dc807198a8ef..2a857506867db5 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -345,8 +345,9 @@ Python/ast_opt.c fold_unaryop ops - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - +Python/codecs.c - codecs_native_error_polcies - Python/codecs.c - ucnhash_capi - -Python/codecs.c _PyCodec_InitRegistry methods - +Python/codecs.c _PyCodec_InitRegistry error_handlers - Python/compile.c - NO_LOCATION - Python/dynload_shlib.c - _PyImport_DynLoadFiletab - Python/dynload_stub.c - _PyImport_DynLoadFiletab - From a134f2300844ce7d8b55c8ba4ab6e65f91e5f136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:46:57 +0200 Subject: [PATCH 06/29] add tests --- Lib/test/test_codeccallbacks.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 4991330489d139..a803a5a132b980 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -1235,6 +1235,31 @@ class FakeUnicodeError(Exception): with self.assertRaises((TypeError, FakeUnicodeError)): handler(FakeUnicodeError()) + def test_reject_unregister_native_error_policy(self): + for policy in [ + 'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace', + 'xmlcharrefreplace', 'surrogateescape', 'surrogatepass', + ]: + with self.subTest(f'reject native {policy!r} un-registration'): + self.assertRaises(ValueError, codecs.unregister_error, policy) + + def test_unregister_custom_error_policy(self): + def custom_handler(exc): + raise exc + + custom_name = f'test.test_unregister_error.custom.{id(self)}' + self.assertRaises(LookupError, codecs.lookup_error, custom_name) + codecs.register_error(custom_name, custom_handler) + self.assertIs(codecs.lookup_error(custom_name), custom_handler) + self.assertTrue(codecs.unregister_error(custom_name)) + self.assertRaises(LookupError, codecs.lookup_error, custom_name) + + def test_unregister_custom_unknown_error_policy(self): + unknown_name = f'test.test_unregister_error.custom.{id(self)}.unknown' + self.assertRaises(LookupError, codecs.lookup_error, unknown_name) + self.assertFalse(codecs.unregister_error(unknown_name)) + self.assertRaises(LookupError, codecs.lookup_error, unknown_name) + if __name__ == "__main__": unittest.main() From 47971e965fb8c1c99301da02bb2473e7463ee9c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:06:08 +0200 Subject: [PATCH 07/29] add docs --- Doc/c-api/codec.rst | 16 ++++++++++++++++ Doc/library/codecs.rst | 11 +++++++++++ 2 files changed, 27 insertions(+) diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst index 8ae5c4fecd6248..ada87c991b2255 100644 --- a/Doc/c-api/codec.rst +++ b/Doc/c-api/codec.rst @@ -97,6 +97,22 @@ Registry API for Unicode encoding error handlers Return ``0`` on success, ``-1`` on error. +.. c:function:: int PyCodec_UnregisterError(const char *name) + + Un-register the error handling callback function error under the given error + policy *name*. :ref:`Standard error policies ` cannot be + un-registered. + + * Return ``-1`` and set a :exc:`ValueError` exception if *name* is a + standard error policy or if an error occurred (in which case the + existing exception is propagated). + + * Return ``0`` if no error handler is associated with the given error policy. + + * Return ``1`` if the error handler was successfully removed. + + .. versionadded:: 3.14 + .. c:function:: PyObject* PyCodec_LookupError(const char *name) Lookup the error handling callback function registered under *name*. As a diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 2cfd8a1eaee806..efa4ff9e1fd374 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -439,6 +439,17 @@ handler: replacement from the error handler will be put into the output directly. +.. function:: unregister_error(name) + + Attempt to un-register the error handling function under the name *name*. + + This raises a :exc:`ValueError` if *name* denotes a standard error policy + as specified above. Otherwise, this returns ``True`` if an error handler + existed for the given *name* and ``False`` otherwise. + + .. versionadded:: 3.14 + + Previously registered error handlers (including the standard error handlers) can be looked up by name: From c00ad8db26f19b4d073eaf9294dd17df7996d62c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:08:28 +0200 Subject: [PATCH 08/29] add What's New entry --- Doc/whatsnew/3.14.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 6875c4c909b3c7..5fd987788850dc 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -216,6 +216,13 @@ ast (Contributed by Irit Katriel in :gh:`123958`.) +codecs +------ + +* Added :func:`codecs.unregister_error` to un-register custom error handlers. + (Contributed by Bénédikt Tran in :gh:`124665`.) + + ctypes ------ @@ -672,6 +679,10 @@ New Features `__ mentioned in :pep:`630` (:gh:`124153`). +* Add :c:func:`PyCodec_UnregisterError` to the limited C API to un-register + custom error handlers. + (Contributed by Bénédikt Tran in :gh:`124665`.) + Porting to Python 3.14 ---------------------- From cf3bc2f46c213753887f12999182bd6b3987581f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:09:56 +0200 Subject: [PATCH 09/29] blurb --- .../next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst | 2 ++ .../next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst create mode 100644 Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst diff --git a/Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst b/Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst new file mode 100644 index 00000000000000..8944e5aaf6537f --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyCodec_UnregisterError` to the limited C API to un-register +custom error handlers. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst b/Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst new file mode 100644 index 00000000000000..75912c81ffd9d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst @@ -0,0 +1,2 @@ +Added :func:`codecs.unregister_error` to un-register custom error handlers. +Patch by Bénédikt Tran. From 21083085f8beadc2248d04957499be4ed5683315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:36:56 +0200 Subject: [PATCH 10/29] docs fix --- Doc/c-api/codec.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst index ada87c991b2255..c60242cdbe84c7 100644 --- a/Doc/c-api/codec.rst +++ b/Doc/c-api/codec.rst @@ -100,7 +100,7 @@ Registry API for Unicode encoding error handlers .. c:function:: int PyCodec_UnregisterError(const char *name) Un-register the error handling callback function error under the given error - policy *name*. :ref:`Standard error policies ` cannot be + policy *name*. :ref:`Standard error policies ` cannot be un-registered. * Return ``-1`` and set a :exc:`ValueError` exception if *name* is a From 1036516e49a828cedbdbff68433302865851d85e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:43:33 +0200 Subject: [PATCH 11/29] simplify logic --- Python/codecs.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index b5c36156dd51db..5ea4c58c07653f 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -650,13 +650,7 @@ int PyCodec_UnregisterError(const char *name) } PyInterpreterState *interp = _PyInterpreterState_GET(); assert(interp->codecs.initialized); - PyObject *handler = NULL; - if (PyDict_PopString(interp->codecs.error_registry, name, &handler) < 0) { - return -1; - } - int exists = handler == NULL ? 0 : 1; - Py_XDECREF(handler); - return exists; + return PyDict_PopString(interp->codecs.error_registry, name, NULL); } /* Lookup the error handling callback function registered under the From ae601fefc572a4c4d72133f12cb5ad5515790d0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:45:15 +0200 Subject: [PATCH 12/29] nit: error message standardization --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index 5ea4c58c07653f..17420e52b63299 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -644,7 +644,7 @@ int PyCodec_UnregisterError(const char *name) for (size_t i = 0; i < CODECS_ERROR_POLICY_COUNT; ++i) { if (strcmp(name, codecs_native_error_polcies[i]) == 0) { PyErr_Format(PyExc_ValueError, - "cannot unregister standard error policy '%s'", name); + "cannot un-register standard error policy '%s'", name); return -1; } } From 864c1ab80e118917cd12d73efe1de1934312c654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:25:41 +0200 Subject: [PATCH 13/29] Make it a private utility for now. --- Include/codecs.h | 12 ------------ Include/internal/pycore_codecs.h | 11 +++++++++++ Modules/_codecsmodule.c | 10 +++++----- Modules/clinic/_codecsmodule.c.h | 18 +++++++++--------- Python/codecs.c | 2 +- 5 files changed, 26 insertions(+), 27 deletions(-) diff --git a/Include/codecs.h b/Include/codecs.h index 8028902da2b02c..512a3c723eca18 100644 --- a/Include/codecs.h +++ b/Include/codecs.h @@ -141,18 +141,6 @@ PyAPI_FUNC(PyObject *) PyCodec_StreamWriter( Return 0 on success, -1 on error */ PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error); -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030e0000 -/* Un-register the error handling callback function error under the given - name. Only non-native error handlers can be un-registered. - - - Return -1 and set an exception if 'name' is a native error policy, or - if an error occurred. - - Return 0 if no error handler is associated with the given error policy. - - Return 1 if the error handler was successfully removed. -*/ -PyAPI_FUNC(int) PyCodec_UnregisterError(const char *name); -#endif - /* Lookup the error handling callback function registered under the given name. As a special case NULL can be passed, in which case the error handling callback for "strict" will be returned. */ diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index 5e2d5c5ce9d868..3e7c053f765198 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -21,6 +21,17 @@ extern void _PyCodec_Fini(PyInterpreterState *interp); extern PyObject* _PyCodec_Lookup(const char *encoding); +/* + * Un-register the error handling callback function error under the given + * name. Only non-standard error handlers can be un-registered. + * + * - Return -1 and set an exception if 'name' is a standard error policy, + * or if an error occurred. + * - Return 0 if no error handler is associated with the given error policy. + * - Return 1 if the error handler was successfully removed. + */ +extern int _PyCodec_UnregisterError(const char *name); + /* Text codec specific encoding and decoding API. Checks the encoding against a list of codecs which do not diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 09c754aec076a2..426af21d4b1eec 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -980,7 +980,7 @@ _codecs_register_error_impl(PyObject *module, const char *errors, } /*[clinic input] -_codecs.unregister_error -> bool +_codecs._unregister_error -> bool errors: str / @@ -993,10 +993,10 @@ the errors policy is not allowed. [clinic start generated code]*/ static int -_codecs_unregister_error_impl(PyObject *module, const char *errors) -/*[clinic end generated code: output=d8c1418e402aecdf input=103c9dae16f32e08]*/ +_codecs__unregister_error_impl(PyObject *module, const char *errors) +/*[clinic end generated code: output=28c22be667465503 input=aa488f7d1f308c8c]*/ { - return PyCodec_UnregisterError(errors); + return _PyCodec_UnregisterError(errors); } /*[clinic input] @@ -1064,7 +1064,7 @@ static PyMethodDef _codecs_functions[] = { _CODECS_CODE_PAGE_ENCODE_METHODDEF _CODECS_CODE_PAGE_DECODE_METHODDEF _CODECS_REGISTER_ERROR_METHODDEF - _CODECS_UNREGISTER_ERROR_METHODDEF + _CODECS__UNREGISTER_ERROR_METHODDEF _CODECS_LOOKUP_ERROR_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index e5a5c03d52d8a1..fa4e0a94b9df34 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -2683,8 +2683,8 @@ _codecs_register_error(PyObject *module, PyObject *const *args, Py_ssize_t nargs return return_value; } -PyDoc_STRVAR(_codecs_unregister_error__doc__, -"unregister_error($module, errors, /)\n" +PyDoc_STRVAR(_codecs__unregister_error__doc__, +"_unregister_error($module, errors, /)\n" "--\n" "\n" "Un-register the specified error handler under the name errors.\n" @@ -2694,21 +2694,21 @@ PyDoc_STRVAR(_codecs_unregister_error__doc__, "errors policy is not recognized. An exception is raised when\n" "the errors policy is not allowed."); -#define _CODECS_UNREGISTER_ERROR_METHODDEF \ - {"unregister_error", (PyCFunction)_codecs_unregister_error, METH_O, _codecs_unregister_error__doc__}, +#define _CODECS__UNREGISTER_ERROR_METHODDEF \ + {"_unregister_error", (PyCFunction)_codecs__unregister_error, METH_O, _codecs__unregister_error__doc__}, static int -_codecs_unregister_error_impl(PyObject *module, const char *errors); +_codecs__unregister_error_impl(PyObject *module, const char *errors); static PyObject * -_codecs_unregister_error(PyObject *module, PyObject *arg) +_codecs__unregister_error(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; const char *errors; int _return_value; if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("unregister_error", "argument", "str", arg); + _PyArg_BadArgument("_unregister_error", "argument", "str", arg); goto exit; } Py_ssize_t errors_length; @@ -2720,7 +2720,7 @@ _codecs_unregister_error(PyObject *module, PyObject *arg) PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } - _return_value = _codecs_unregister_error_impl(module, errors); + _return_value = _codecs__unregister_error_impl(module, errors); if ((_return_value == -1) && PyErr_Occurred()) { goto exit; } @@ -2793,4 +2793,4 @@ _codecs_lookup_error(PyObject *module, PyObject *arg) #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=d7cb9b22406515a2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ca479a8de1f17423 input=a9049054013a1b77]*/ diff --git a/Python/codecs.c b/Python/codecs.c index 17420e52b63299..48c95cc90eb52d 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -639,7 +639,7 @@ int PyCodec_RegisterError(const char *name, PyObject *error) name, error); } -int PyCodec_UnregisterError(const char *name) +int _PyCodec_UnregisterError(const char *name) { for (size_t i = 0; i < CODECS_ERROR_POLICY_COUNT; ++i) { if (strcmp(name, codecs_native_error_polcies[i]) == 0) { From 3d9ff154421ce52501677bff0412ec3f21ab41d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:27:00 +0200 Subject: [PATCH 14/29] Revert "docs fix" This reverts commit 21083085f8beadc2248d04957499be4ed5683315. --- Doc/c-api/codec.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst index c60242cdbe84c7..ada87c991b2255 100644 --- a/Doc/c-api/codec.rst +++ b/Doc/c-api/codec.rst @@ -100,7 +100,7 @@ Registry API for Unicode encoding error handlers .. c:function:: int PyCodec_UnregisterError(const char *name) Un-register the error handling callback function error under the given error - policy *name*. :ref:`Standard error policies ` cannot be + policy *name*. :ref:`Standard error policies ` cannot be un-registered. * Return ``-1`` and set a :exc:`ValueError` exception if *name* is a From dbf0d5bf32d83caf42def76f867c048fd4edc648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:27:02 +0200 Subject: [PATCH 15/29] Revert "blurb" This reverts commit cf3bc2f46c213753887f12999182bd6b3987581f. --- .../next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst | 2 -- .../next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst | 2 -- 2 files changed, 4 deletions(-) delete mode 100644 Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst delete mode 100644 Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst diff --git a/Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst b/Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst deleted file mode 100644 index 8944e5aaf6537f..00000000000000 --- a/Misc/NEWS.d/next/C_API/2024-09-27-17-09-49.gh-issue-124665.KW2F8D.rst +++ /dev/null @@ -1,2 +0,0 @@ -Add :c:func:`PyCodec_UnregisterError` to the limited C API to un-register -custom error handlers. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst b/Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst deleted file mode 100644 index 75912c81ffd9d6..00000000000000 --- a/Misc/NEWS.d/next/Library/2024-09-27-17-09-25.gh-issue-124665.qiMUai.rst +++ /dev/null @@ -1,2 +0,0 @@ -Added :func:`codecs.unregister_error` to un-register custom error handlers. -Patch by Bénédikt Tran. From 5ba676c4111b1ef01232e6a90d8d04c36d9c7067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:27:11 +0200 Subject: [PATCH 16/29] Revert "add What's New entry" This reverts commit c00ad8db26f19b4d073eaf9294dd17df7996d62c. --- Doc/whatsnew/3.14.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5fd987788850dc..6875c4c909b3c7 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -216,13 +216,6 @@ ast (Contributed by Irit Katriel in :gh:`123958`.) -codecs ------- - -* Added :func:`codecs.unregister_error` to un-register custom error handlers. - (Contributed by Bénédikt Tran in :gh:`124665`.) - - ctypes ------ @@ -679,10 +672,6 @@ New Features `__ mentioned in :pep:`630` (:gh:`124153`). -* Add :c:func:`PyCodec_UnregisterError` to the limited C API to un-register - custom error handlers. - (Contributed by Bénédikt Tran in :gh:`124665`.) - Porting to Python 3.14 ---------------------- From 8a0a0a91e80fe607db22af29af663ba13c19000e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:27:15 +0200 Subject: [PATCH 17/29] Revert "add docs" This reverts commit 47971e965fb8c1c99301da02bb2473e7463ee9c6. --- Doc/c-api/codec.rst | 16 ---------------- Doc/library/codecs.rst | 11 ----------- 2 files changed, 27 deletions(-) diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst index ada87c991b2255..8ae5c4fecd6248 100644 --- a/Doc/c-api/codec.rst +++ b/Doc/c-api/codec.rst @@ -97,22 +97,6 @@ Registry API for Unicode encoding error handlers Return ``0`` on success, ``-1`` on error. -.. c:function:: int PyCodec_UnregisterError(const char *name) - - Un-register the error handling callback function error under the given error - policy *name*. :ref:`Standard error policies ` cannot be - un-registered. - - * Return ``-1`` and set a :exc:`ValueError` exception if *name* is a - standard error policy or if an error occurred (in which case the - existing exception is propagated). - - * Return ``0`` if no error handler is associated with the given error policy. - - * Return ``1`` if the error handler was successfully removed. - - .. versionadded:: 3.14 - .. c:function:: PyObject* PyCodec_LookupError(const char *name) Lookup the error handling callback function registered under *name*. As a diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index efa4ff9e1fd374..2cfd8a1eaee806 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -439,17 +439,6 @@ handler: replacement from the error handler will be put into the output directly. -.. function:: unregister_error(name) - - Attempt to un-register the error handling function under the name *name*. - - This raises a :exc:`ValueError` if *name* denotes a standard error policy - as specified above. Otherwise, this returns ``True`` if an error handler - existed for the given *name* and ``False`` otherwise. - - .. versionadded:: 3.14 - - Previously registered error handlers (including the standard error handlers) can be looked up by name: From 7915fa1bca4b577da4724b1df5cabe9c8f9e7048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:28:13 +0200 Subject: [PATCH 18/29] Revert "Update stable ABI files." This reverts commit 58370190b365d7039ed92a586eff80d705e457f1. --- Doc/data/refcounts.dat | 3 --- Doc/data/stable_abi.dat | 1 - Lib/test/test_stable_abi_ctypes.py | 1 - Misc/stable_abi.toml | 2 -- PC/python3dll.c | 1 - 5 files changed, 8 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 705af97c4b9f62..65d48f8bea7de8 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -318,9 +318,6 @@ PyCodec_RegisterError:int::: PyCodec_RegisterError:const char*:name:: PyCodec_RegisterError:PyObject*:error:+1: -PyCodec_UnregisterError:int::: -PyCodec_UnregisterError:const char*:name:: - PyCodec_LookupError:PyObject*::+1: PyCodec_LookupError:const char*:name:: diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 91c7156ebac4b0..19dc71a345b474 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -87,7 +87,6 @@ func,PyCodec_StreamReader,3.2,, func,PyCodec_StreamWriter,3.2,, func,PyCodec_StrictErrors,3.2,, func,PyCodec_Unregister,3.10,, -func,PyCodec_UnregisterError,3.14,, func,PyCodec_XMLCharRefReplaceErrors,3.2,, func,PyComplex_FromDoubles,3.2,, func,PyComplex_ImagAsDouble,3.2,, diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 97a4bea7af976f..d16ad7ef5d4328 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -128,7 +128,6 @@ def test_windows_feature_macros(self): "PyCodec_StreamWriter", "PyCodec_StrictErrors", "PyCodec_Unregister", - "PyCodec_UnregisterError", "PyCodec_XMLCharRefReplaceErrors", "PyComplex_FromDoubles", "PyComplex_ImagAsDouble", diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 14a90a334dd79a..fe0a5e44f8fb15 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2510,8 +2510,6 @@ added = '3.14' [function.Py_REFCNT] added = '3.14' -[function.PyCodec_UnregisterError] - added = '3.14' [function.PyIter_NextItem] added = '3.14' [function.PyLong_FromInt32] diff --git a/PC/python3dll.c b/PC/python3dll.c index 583c9a7b3c10bd..6b8208ab90bd95 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -164,7 +164,6 @@ EXPORT_FUNC(PyCodec_StreamReader) EXPORT_FUNC(PyCodec_StreamWriter) EXPORT_FUNC(PyCodec_StrictErrors) EXPORT_FUNC(PyCodec_Unregister) -EXPORT_FUNC(PyCodec_UnregisterError) EXPORT_FUNC(PyCodec_XMLCharRefReplaceErrors) EXPORT_FUNC(PyComplex_FromDoubles) EXPORT_FUNC(PyComplex_ImagAsDouble) From 8eaa147d2678cdaec4d3e6fdc7358d8f84b39b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:33:33 +0200 Subject: [PATCH 19/29] simplify array logic --- Python/codecs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 48c95cc90eb52d..d6d4f528879896 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -24,9 +24,8 @@ Copyright (c) Corporation for National Research Initiatives. #define CODECS_NAMEREPLACE_ERROR_POLICY 5 #define CODECS_SURROGATEPASS_ERROR_POLICY 6 #define CODECS_SURROGATEESCAPE_ERROR_POLICY 7 -#define CODECS_ERROR_POLICY_COUNT 8 -static const char codecs_native_error_polcies[CODECS_ERROR_POLICY_COUNT][32] = { +static const char *codecs_native_error_polcies[] = { [CODECS_STRICT_ERROR_POLICY] = "strict", [CODECS_IGNORE_ERROR_POLICY] = "ignore", [CODECS_REPLACE_ERROR_POLICY] = "replace", @@ -641,7 +640,7 @@ int PyCodec_RegisterError(const char *name, PyObject *error) int _PyCodec_UnregisterError(const char *name) { - for (size_t i = 0; i < CODECS_ERROR_POLICY_COUNT; ++i) { + for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_native_error_polcies); ++i) { if (strcmp(name, codecs_native_error_polcies[i]) == 0) { PyErr_Format(PyExc_ValueError, "cannot un-register standard error policy '%s'", name); From 7f41aecf66d5ede20800b2235c1ddba64e4c19ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:34:54 +0200 Subject: [PATCH 20/29] update tests --- Lib/test/test_codeccallbacks.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index a803a5a132b980..f3793b811f858c 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -1210,7 +1210,6 @@ def replace_with_long(exc): '\ufffd\x00\x00' ) - def test_fake_error_class(self): handlers = [ codecs.strict_errors, @@ -1241,7 +1240,7 @@ def test_reject_unregister_native_error_policy(self): 'xmlcharrefreplace', 'surrogateescape', 'surrogatepass', ]: with self.subTest(f'reject native {policy!r} un-registration'): - self.assertRaises(ValueError, codecs.unregister_error, policy) + self.assertRaises(ValueError, codecs._unregister_error, policy) def test_unregister_custom_error_policy(self): def custom_handler(exc): @@ -1251,13 +1250,13 @@ def custom_handler(exc): self.assertRaises(LookupError, codecs.lookup_error, custom_name) codecs.register_error(custom_name, custom_handler) self.assertIs(codecs.lookup_error(custom_name), custom_handler) - self.assertTrue(codecs.unregister_error(custom_name)) + self.assertTrue(codecs._unregister_error(custom_name)) self.assertRaises(LookupError, codecs.lookup_error, custom_name) def test_unregister_custom_unknown_error_policy(self): unknown_name = f'test.test_unregister_error.custom.{id(self)}.unknown' self.assertRaises(LookupError, codecs.lookup_error, unknown_name) - self.assertFalse(codecs.unregister_error(unknown_name)) + self.assertFalse(codecs._unregister_error(unknown_name)) self.assertRaises(LookupError, codecs.lookup_error, unknown_name) From 988b34d0447a43ed730780c93afd7ac16b0c7574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 11:23:20 +0200 Subject: [PATCH 21/29] fix tests --- Lib/test/test_codeccallbacks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index f3793b811f858c..78e671d5da3de0 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -1,3 +1,4 @@ +from _codecs import _unregister_error as _codecs_unregister_error import codecs import html.entities import itertools @@ -1240,7 +1241,7 @@ def test_reject_unregister_native_error_policy(self): 'xmlcharrefreplace', 'surrogateescape', 'surrogatepass', ]: with self.subTest(f'reject native {policy!r} un-registration'): - self.assertRaises(ValueError, codecs._unregister_error, policy) + self.assertRaises(ValueError, _codecs_unregister_error, policy) def test_unregister_custom_error_policy(self): def custom_handler(exc): @@ -1250,13 +1251,13 @@ def custom_handler(exc): self.assertRaises(LookupError, codecs.lookup_error, custom_name) codecs.register_error(custom_name, custom_handler) self.assertIs(codecs.lookup_error(custom_name), custom_handler) - self.assertTrue(codecs._unregister_error(custom_name)) + self.assertTrue(_codecs_unregister_error(custom_name)) self.assertRaises(LookupError, codecs.lookup_error, custom_name) def test_unregister_custom_unknown_error_policy(self): unknown_name = f'test.test_unregister_error.custom.{id(self)}.unknown' self.assertRaises(LookupError, codecs.lookup_error, unknown_name) - self.assertFalse(codecs._unregister_error(unknown_name)) + self.assertFalse(_codecs_unregister_error(unknown_name)) self.assertRaises(LookupError, codecs.lookup_error, unknown_name) From c6e1d98c7379c6941acdbf4fc017e8f40133cfdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 12:46:02 +0200 Subject: [PATCH 22/29] Duplicate standard error policies to avoid indirection. --- Python/codecs.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index d6d4f528879896..ea24463218db3f 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -16,24 +16,10 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI -#define CODECS_STRICT_ERROR_POLICY 0 -#define CODECS_IGNORE_ERROR_POLICY 1 -#define CODECS_REPLACE_ERROR_POLICY 2 -#define CODECS_XMLCHARREFREPLACE_ERROR_POLICY 3 -#define CODECS_BACKSLASHREPLACE_ERROR_POLICY 4 -#define CODECS_NAMEREPLACE_ERROR_POLICY 5 -#define CODECS_SURROGATEPASS_ERROR_POLICY 6 -#define CODECS_SURROGATEESCAPE_ERROR_POLICY 7 - static const char *codecs_native_error_polcies[] = { - [CODECS_STRICT_ERROR_POLICY] = "strict", - [CODECS_IGNORE_ERROR_POLICY] = "ignore", - [CODECS_REPLACE_ERROR_POLICY] = "replace", - [CODECS_XMLCHARREFREPLACE_ERROR_POLICY] = "xmlcharrefreplace", - [CODECS_BACKSLASHREPLACE_ERROR_POLICY] = "backslashreplace", - [CODECS_NAMEREPLACE_ERROR_POLICY] = "namereplace", - [CODECS_SURROGATEPASS_ERROR_POLICY] = "surrogatepass", - [CODECS_SURROGATEESCAPE_ERROR_POLICY] = "surrogateescape", + "strict", "ignore", "replace", + "xmlcharrefreplace", "backslashreplace", "namereplace", + "surrogatepass", "surrogateescape", }; const char *Py_hexdigits = "0123456789abcdef"; @@ -1420,12 +1406,12 @@ PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) { static struct { - int policy; + const char *name; PyMethodDef def; } error_handlers[] = { { - CODECS_STRICT_ERROR_POLICY, + "strict", { "strict_errors", strict_errors, @@ -1435,7 +1421,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - CODECS_IGNORE_ERROR_POLICY, + "ignore", { "ignore_errors", ignore_errors, @@ -1445,7 +1431,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - CODECS_REPLACE_ERROR_POLICY, + "replace", { "replace_errors", replace_errors, @@ -1455,7 +1441,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - CODECS_XMLCHARREFREPLACE_ERROR_POLICY, + "xmlcharrefreplace", { "xmlcharrefreplace_errors", xmlcharrefreplace_errors, @@ -1466,7 +1452,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - CODECS_BACKSLASHREPLACE_ERROR_POLICY, + "backslashreplace", { "backslashreplace_errors", backslashreplace_errors, @@ -1477,7 +1463,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - CODECS_NAMEREPLACE_ERROR_POLICY, + "namereplace", { "namereplace_errors", namereplace_errors, @@ -1488,7 +1474,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - CODECS_SURROGATEPASS_ERROR_POLICY, + "surrogatepass", { "surrogatepass", surrogatepass_errors, @@ -1496,7 +1482,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } }, { - CODECS_SURROGATEESCAPE_ERROR_POLICY, + "surrogateescape", { "surrogateescape", surrogateescape_errors, @@ -1524,8 +1510,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) return PyStatus_NoMemory(); } - const char *name = codecs_native_error_polcies[error_handlers[i].policy]; - int res = PyDict_SetItemString(interp->codecs.error_registry, name, func); + int res = PyDict_SetItemString(interp->codecs.error_registry, error_handlers[i].name, func); Py_DECREF(func); if (res < 0) { return PyStatus_Error("Failed to insert into codec error registry"); From dd6d2105e752a229edd9d67261d6a2e2f605f2f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 12:46:42 +0200 Subject: [PATCH 23/29] standardize names --- Python/codecs.c | 6 +++--- Tools/c-analyzer/cpython/ignored.tsv | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index ea24463218db3f..ffae47061d4bc0 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -16,7 +16,7 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI -static const char *codecs_native_error_polcies[] = { +static const char *codecs_standard_error_polcies[] = { "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace", "namereplace", "surrogatepass", "surrogateescape", @@ -626,8 +626,8 @@ int PyCodec_RegisterError(const char *name, PyObject *error) int _PyCodec_UnregisterError(const char *name) { - for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_native_error_polcies); ++i) { - if (strcmp(name, codecs_native_error_polcies[i]) == 0) { + for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_standard_error_polcies); ++i) { + if (strcmp(name, codecs_standard_error_polcies[i]) == 0) { PyErr_Format(PyExc_ValueError, "cannot un-register standard error policy '%s'", name); return -1; diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 2a857506867db5..f58e16dd751770 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -345,7 +345,7 @@ Python/ast_opt.c fold_unaryop ops - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - -Python/codecs.c - codecs_native_error_polcies - +Python/codecs.c - codecs_standard_error_polcies - Python/codecs.c - ucnhash_capi - Python/codecs.c _PyCodec_InitRegistry error_handlers - Python/compile.c - NO_LOCATION - From b68e54eba55fddd54cafc03db80681e50f006cf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 12:47:21 +0200 Subject: [PATCH 24/29] PEP 7 --- Python/codecs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index ffae47061d4bc0..d059b944b0b901 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1510,7 +1510,8 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) return PyStatus_NoMemory(); } - int res = PyDict_SetItemString(interp->codecs.error_registry, error_handlers[i].name, func); + int res = PyDict_SetItemString(interp->codecs.error_registry, + error_handlers[i].name, func); Py_DECREF(func); if (res < 0) { return PyStatus_Error("Failed to insert into codec error registry"); From 2c528fe81fd0e4733b6f11a60c0f0c92857f09e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 12:48:46 +0200 Subject: [PATCH 25/29] standardize names --- Lib/test/test_codeccallbacks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 78e671d5da3de0..f095cdd314d7bf 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -1235,12 +1235,12 @@ class FakeUnicodeError(Exception): with self.assertRaises((TypeError, FakeUnicodeError)): handler(FakeUnicodeError()) - def test_reject_unregister_native_error_policy(self): + def test_reject_unregister_standard_error_policy(self): for policy in [ 'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace', 'xmlcharrefreplace', 'surrogateescape', 'surrogatepass', ]: - with self.subTest(f'reject native {policy!r} un-registration'): + with self.subTest(policy): self.assertRaises(ValueError, _codecs_unregister_error, policy) def test_unregister_custom_error_policy(self): From c95cd65234bb3a83529425e9e1673f1b0e04809d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 12:51:41 +0200 Subject: [PATCH 26/29] revert renaming as well --- Python/codecs.c | 8 ++++---- Tools/c-analyzer/cpython/ignored.tsv | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index d059b944b0b901..ac39036b189cfb 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1408,7 +1408,7 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) static struct { const char *name; PyMethodDef def; - } error_handlers[] = + } methods[] = { { "strict", @@ -1504,14 +1504,14 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) if (interp->codecs.error_registry == NULL) { return PyStatus_NoMemory(); } - for (size_t i = 0; i < Py_ARRAY_LENGTH(error_handlers); ++i) { - PyObject *func = PyCFunction_NewEx(&error_handlers[i].def, NULL, NULL); + for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { + PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); if (func == NULL) { return PyStatus_NoMemory(); } int res = PyDict_SetItemString(interp->codecs.error_registry, - error_handlers[i].name, func); + methods[i].name, func); Py_DECREF(func); if (res < 0) { return PyStatus_Error("Failed to insert into codec error registry"); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index f58e16dd751770..10ec3bd156e846 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -347,7 +347,7 @@ Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - Python/codecs.c - codecs_standard_error_polcies - Python/codecs.c - ucnhash_capi - -Python/codecs.c _PyCodec_InitRegistry error_handlers - +Python/codecs.c _PyCodec_InitRegistry methods - Python/compile.c - NO_LOCATION - Python/dynload_shlib.c - _PyImport_DynLoadFiletab - Python/dynload_stub.c - _PyImport_DynLoadFiletab - From 101eb76372a403b15eb1f9c3b50c64544253eb8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 15:18:16 +0200 Subject: [PATCH 27/29] use 'built-in' instead of 'standard' terminology --- Include/internal/pycore_codecs.h | 4 ++-- Lib/test/test_codeccallbacks.py | 13 ++++++++++--- Modules/_codecsmodule.c | 14 ++++++++------ Modules/clinic/_codecsmodule.c.h | 14 ++++++++------ Python/codecs.c | 8 ++++---- Tools/c-analyzer/cpython/ignored.tsv | 2 +- 6 files changed, 33 insertions(+), 22 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index 3e7c053f765198..802bca253962ff 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -23,9 +23,9 @@ extern PyObject* _PyCodec_Lookup(const char *encoding); /* * Un-register the error handling callback function error under the given - * name. Only non-standard error handlers can be un-registered. + * name. Only non-built-in error handlers can be un-registered. * - * - Return -1 and set an exception if 'name' is a standard error policy, + * - Return -1 and set an exception if 'name' is a built-in error policy, * or if an error occurred. * - Return 0 if no error handler is associated with the given error policy. * - Return 1 if the error handler was successfully removed. diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index f095cdd314d7bf..736a9f17785aae 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -2,6 +2,7 @@ import codecs import html.entities import itertools +import os import sys import unicodedata import unittest @@ -1235,7 +1236,7 @@ class FakeUnicodeError(Exception): with self.assertRaises((TypeError, FakeUnicodeError)): handler(FakeUnicodeError()) - def test_reject_unregister_standard_error_policy(self): + def test_reject_unregister_builtin_error_policy(self): for policy in [ 'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace', 'xmlcharrefreplace', 'surrogateescape', 'surrogatepass', @@ -1247,7 +1248,10 @@ def test_unregister_custom_error_policy(self): def custom_handler(exc): raise exc - custom_name = f'test.test_unregister_error.custom.{id(self)}' + # We want a unique ID in case the test is executed multiple times + # to be sure that we always try to un-register a new error policy. + unique_id = int.from_bytes(os.urandom(8)) + custom_name = f'test.test_unregister_error.custom.{unique_id}' self.assertRaises(LookupError, codecs.lookup_error, custom_name) codecs.register_error(custom_name, custom_handler) self.assertIs(codecs.lookup_error(custom_name), custom_handler) @@ -1255,7 +1259,10 @@ def custom_handler(exc): self.assertRaises(LookupError, codecs.lookup_error, custom_name) def test_unregister_custom_unknown_error_policy(self): - unknown_name = f'test.test_unregister_error.custom.{id(self)}.unknown' + # We want a unique ID in case the test is executed multiple times + # to be sure that we always try to un-register an unknown error policy. + unique_id = int.from_bytes(os.urandom(8)) + unknown_name = f'test.test_unregister_error.custom.unknown.{unique_id}' self.assertRaises(LookupError, codecs.lookup_error, unknown_name) self.assertFalse(_codecs_unregister_error(unknown_name)) self.assertRaises(LookupError, codecs.lookup_error, unknown_name) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 426af21d4b1eec..593e4cb2f966e6 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -984,17 +984,19 @@ _codecs._unregister_error -> bool errors: str / -Un-register the specified error handler under the name errors. +Un-register the specified error handler under the name 'errors'. -Only custom handlers can be un-registered. Returns True if the -handler has been successfully un-registered, and False if the -errors policy is not recognized. An exception is raised when -the errors policy is not allowed. +Only custom error handlers can be un-registered. Returns True +if the handler has been successfully un-registered, and False +if the error policy is not registered. + +An exception is raised when the error policy is a built-in one +or if an error occurred. [clinic start generated code]*/ static int _codecs__unregister_error_impl(PyObject *module, const char *errors) -/*[clinic end generated code: output=28c22be667465503 input=aa488f7d1f308c8c]*/ +/*[clinic end generated code: output=28c22be667465503 input=b5fb3325eea01278]*/ { return _PyCodec_UnregisterError(errors); } diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index fa4e0a94b9df34..96c3655b7d82de 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -2687,12 +2687,14 @@ PyDoc_STRVAR(_codecs__unregister_error__doc__, "_unregister_error($module, errors, /)\n" "--\n" "\n" -"Un-register the specified error handler under the name errors.\n" +"Un-register the specified error handler under the name \'errors\'.\n" "\n" -"Only custom handlers can be un-registered. Returns True if the\n" -"handler has been successfully un-registered, and False if the\n" -"errors policy is not recognized. An exception is raised when\n" -"the errors policy is not allowed."); +"Only custom error handlers can be un-registered. Returns True\n" +"if the handler has been successfully un-registered, and False\n" +"if the error policy is not registered.\n" +"\n" +"An exception is raised when the error policy is a built-in one\n" +"or if an error occurred."); #define _CODECS__UNREGISTER_ERROR_METHODDEF \ {"_unregister_error", (PyCFunction)_codecs__unregister_error, METH_O, _codecs__unregister_error__doc__}, @@ -2793,4 +2795,4 @@ _codecs_lookup_error(PyObject *module, PyObject *arg) #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=ca479a8de1f17423 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b0d112761927032f input=a9049054013a1b77]*/ diff --git a/Python/codecs.c b/Python/codecs.c index ac39036b189cfb..3ee9efd588f797 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -16,7 +16,7 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI -static const char *codecs_standard_error_polcies[] = { +static const char *codecs_builtin_error_policies[] = { "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace", "namereplace", "surrogatepass", "surrogateescape", @@ -626,10 +626,10 @@ int PyCodec_RegisterError(const char *name, PyObject *error) int _PyCodec_UnregisterError(const char *name) { - for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_standard_error_polcies); ++i) { - if (strcmp(name, codecs_standard_error_polcies[i]) == 0) { + for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_policies); ++i) { + if (strcmp(name, codecs_builtin_error_policies[i]) == 0) { PyErr_Format(PyExc_ValueError, - "cannot un-register standard error policy '%s'", name); + "cannot un-register built-in error policy '%s'", name); return -1; } } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 10ec3bd156e846..d99a252863064d 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -345,7 +345,7 @@ Python/ast_opt.c fold_unaryop ops - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - -Python/codecs.c - codecs_standard_error_polcies - +Python/codecs.c - codecs_builtin_error_policies - Python/codecs.c - ucnhash_capi - Python/codecs.c _PyCodec_InitRegistry methods - Python/compile.c - NO_LOCATION - From 903238a548254345f72ffc0a71d18c385a7b1dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 15:20:56 +0200 Subject: [PATCH 28/29] use 'random.getrandbits' instead of 'os.urandom' to generate random IDs --- Lib/test/test_codeccallbacks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 736a9f17785aae..1d30b03502dcaa 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -2,7 +2,7 @@ import codecs import html.entities import itertools -import os +import random import sys import unicodedata import unittest @@ -1250,7 +1250,7 @@ def custom_handler(exc): # We want a unique ID in case the test is executed multiple times # to be sure that we always try to un-register a new error policy. - unique_id = int.from_bytes(os.urandom(8)) + unique_id = random.getrandbits(128) custom_name = f'test.test_unregister_error.custom.{unique_id}' self.assertRaises(LookupError, codecs.lookup_error, custom_name) codecs.register_error(custom_name, custom_handler) @@ -1261,7 +1261,7 @@ def custom_handler(exc): def test_unregister_custom_unknown_error_policy(self): # We want a unique ID in case the test is executed multiple times # to be sure that we always try to un-register an unknown error policy. - unique_id = int.from_bytes(os.urandom(8)) + unique_id = random.getrandbits(128) unknown_name = f'test.test_unregister_error.custom.unknown.{unique_id}' self.assertRaises(LookupError, codecs.lookup_error, unknown_name) self.assertFalse(_codecs_unregister_error(unknown_name)) From a6c692d76bd4976276779d2fd333e876c13e4e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 17:28:50 +0200 Subject: [PATCH 29/29] address Victor's review --- Include/internal/pycore_codecs.h | 12 ++++++------ Lib/test/test_codeccallbacks.py | 23 ++++++++--------------- Modules/_codecsmodule.c | 16 +++++++++------- Modules/clinic/_codecsmodule.c.h | 15 ++++++++------- Python/codecs.c | 10 ++++++---- Tools/c-analyzer/cpython/ignored.tsv | 2 +- 6 files changed, 38 insertions(+), 40 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index 802bca253962ff..4400be8b33dee7 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -22,13 +22,13 @@ extern void _PyCodec_Fini(PyInterpreterState *interp); extern PyObject* _PyCodec_Lookup(const char *encoding); /* - * Un-register the error handling callback function error under the given - * name. Only non-built-in error handlers can be un-registered. + * Un-register the error handling callback function registered under + * the given 'name'. Only custom error handlers can be un-registered. * - * - Return -1 and set an exception if 'name' is a built-in error policy, - * or if an error occurred. - * - Return 0 if no error handler is associated with the given error policy. - * - Return 1 if the error handler was successfully removed. + * - Return -1 and set an exception if 'name' refers to a built-in + * error handling name (e.g., 'strict'), or if an error occurred. + * - Return 0 if no custom error handler can be found for 'name'. + * - Return 1 if the custom error handler was successfully removed. */ extern int _PyCodec_UnregisterError(const char *name); diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 1d30b03502dcaa..86e5e5c1474674 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -2,7 +2,6 @@ import codecs import html.entities import itertools -import random import sys import unicodedata import unittest @@ -1236,33 +1235,27 @@ class FakeUnicodeError(Exception): with self.assertRaises((TypeError, FakeUnicodeError)): handler(FakeUnicodeError()) - def test_reject_unregister_builtin_error_policy(self): - for policy in [ + def test_reject_unregister_builtin_error_handler(self): + for name in [ 'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace', 'xmlcharrefreplace', 'surrogateescape', 'surrogatepass', ]: - with self.subTest(policy): - self.assertRaises(ValueError, _codecs_unregister_error, policy) + with self.subTest(name): + self.assertRaises(ValueError, _codecs_unregister_error, name) - def test_unregister_custom_error_policy(self): + def test_unregister_custom_error_handler(self): def custom_handler(exc): raise exc - # We want a unique ID in case the test is executed multiple times - # to be sure that we always try to un-register a new error policy. - unique_id = random.getrandbits(128) - custom_name = f'test.test_unregister_error.custom.{unique_id}' + custom_name = 'test.test_unregister_custom_error_handler' self.assertRaises(LookupError, codecs.lookup_error, custom_name) codecs.register_error(custom_name, custom_handler) self.assertIs(codecs.lookup_error(custom_name), custom_handler) self.assertTrue(_codecs_unregister_error(custom_name)) self.assertRaises(LookupError, codecs.lookup_error, custom_name) - def test_unregister_custom_unknown_error_policy(self): - # We want a unique ID in case the test is executed multiple times - # to be sure that we always try to un-register an unknown error policy. - unique_id = random.getrandbits(128) - unknown_name = f'test.test_unregister_error.custom.unknown.{unique_id}' + def test_unregister_custom_unknown_error_handler(self): + unknown_name = 'test.test_unregister_custom_unknown_error_handler' self.assertRaises(LookupError, codecs.lookup_error, unknown_name) self.assertFalse(_codecs_unregister_error(unknown_name)) self.assertRaises(LookupError, codecs.lookup_error, unknown_name) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 593e4cb2f966e6..471b42badc8e8c 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -984,19 +984,21 @@ _codecs._unregister_error -> bool errors: str / -Un-register the specified error handler under the name 'errors'. +Un-register the specified error handler for the error handling `errors'. -Only custom error handlers can be un-registered. Returns True -if the handler has been successfully un-registered, and False -if the error policy is not registered. +Only custom error handlers can be un-registered. An exception is raised +if the error handling is a built-in one (e.g., 'strict'), or if an error +occurs. + +Otherwise, this returns True if a custom handler has been successfully +un-registered, and False if no custom handler for the specified error +handling exists. -An exception is raised when the error policy is a built-in one -or if an error occurred. [clinic start generated code]*/ static int _codecs__unregister_error_impl(PyObject *module, const char *errors) -/*[clinic end generated code: output=28c22be667465503 input=b5fb3325eea01278]*/ +/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/ { return _PyCodec_UnregisterError(errors); } diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index 96c3655b7d82de..01855aec5e123e 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -2687,14 +2687,15 @@ PyDoc_STRVAR(_codecs__unregister_error__doc__, "_unregister_error($module, errors, /)\n" "--\n" "\n" -"Un-register the specified error handler under the name \'errors\'.\n" +"Un-register the specified error handler for the error handling `errors\'.\n" "\n" -"Only custom error handlers can be un-registered. Returns True\n" -"if the handler has been successfully un-registered, and False\n" -"if the error policy is not registered.\n" +"Only custom error handlers can be un-registered. An exception is raised\n" +"if the error handling is a built-in one (e.g., \'strict\'), or if an error\n" +"occurs.\n" "\n" -"An exception is raised when the error policy is a built-in one\n" -"or if an error occurred."); +"Otherwise, this returns True if a custom handler has been successfully\n" +"un-registered, and False if no custom handler for the specified error\n" +"handling exists."); #define _CODECS__UNREGISTER_ERROR_METHODDEF \ {"_unregister_error", (PyCFunction)_codecs__unregister_error, METH_O, _codecs__unregister_error__doc__}, @@ -2795,4 +2796,4 @@ _codecs_lookup_error(PyObject *module, PyObject *arg) #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=b0d112761927032f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b3013d4709d96ffe input=a9049054013a1b77]*/ diff --git a/Python/codecs.c b/Python/codecs.c index 3ee9efd588f797..68dc232bb86163 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -16,7 +16,7 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI -static const char *codecs_builtin_error_policies[] = { +static const char *codecs_builtin_error_handlers[] = { "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace", "namereplace", "surrogatepass", "surrogateescape", @@ -626,10 +626,10 @@ int PyCodec_RegisterError(const char *name, PyObject *error) int _PyCodec_UnregisterError(const char *name) { - for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_policies); ++i) { - if (strcmp(name, codecs_builtin_error_policies[i]) == 0) { + for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_handlers); ++i) { + if (strcmp(name, codecs_builtin_error_handlers[i]) == 0) { PyErr_Format(PyExc_ValueError, - "cannot un-register built-in error policy '%s'", name); + "cannot un-register built-in error handler '%s'", name); return -1; } } @@ -1490,6 +1490,8 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } } }; + // ensure that the built-in error handlers' names are kept in sync + assert(Py_ARRAY_LENGTH(methods) == Py_ARRAY_LENGTH(codecs_builtin_error_handlers)); assert(interp->codecs.initialized == 0); interp->codecs.search_path = PyList_New(0); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index d99a252863064d..e6c599a2ac4a46 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -345,7 +345,7 @@ Python/ast_opt.c fold_unaryop ops - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - -Python/codecs.c - codecs_builtin_error_policies - +Python/codecs.c - codecs_builtin_error_handlers - Python/codecs.c - ucnhash_capi - Python/codecs.c _PyCodec_InitRegistry methods - Python/compile.c - NO_LOCATION -