From 8fa6bd199c5d082f8bd88fb760d116f595a8d1b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Sep 2025 18:15:19 +0200 Subject: [PATCH 1/3] simplify XML exception creation --- Modules/pyexpat.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 50db77c4f12c16..6e04db4c635c99 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -125,7 +125,7 @@ CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info, } static int -set_error_code(PyObject *err, enum XML_Error code) +set_xml_error_attr_code(PyObject *err, enum XML_Error code) { PyObject *v = PyLong_FromLong((long)code); int ok = v != NULL && PyObject_SetAttr(err, &_Py_ID(code), v) != -1; @@ -137,7 +137,7 @@ set_error_code(PyObject *err, enum XML_Error code) * false on an exception. */ static int -set_error_location(PyObject *err, const char *name, XML_Size value) +set_xml_error_attr_location(PyObject *err, const char *name, XML_Size value) { PyObject *v = PyLong_FromSize_t((size_t)value); int ok = v != NULL && PyObject_SetAttrString(err, name, v) != -1; @@ -146,32 +146,22 @@ set_error_location(PyObject *err, const char *name, XML_Size value) } -static PyObject * -format_xml_error(enum XML_Error code, XML_Size lineno, XML_Size column) -{ - const char *errmsg = XML_ErrorString(code); - PyUnicodeWriter *writer = PyUnicodeWriter_Create(strlen(errmsg) + 1); - if (writer == NULL) { - return NULL; - } - if (PyUnicodeWriter_Format(writer, - "%s: line %zu, column %zu", - errmsg, (size_t)lineno, (size_t)column) < 0) - { - PyUnicodeWriter_Discard(writer); - return NULL; - } - return PyUnicodeWriter_Finish(writer); -} - static PyObject * set_xml_error(pyexpat_state *state, enum XML_Error code, XML_Size lineno, XML_Size column, const char *errmsg) { - PyObject *arg = errmsg == NULL - ? format_xml_error(code, lineno, column) - : PyUnicode_FromStringAndSize(errmsg, strlen(errmsg)); + PyObject *arg; + if (errmsg == NULL) { + arg = PyUnicode_FromFormat( + "%s: line %zu, column %zu", + XML_ErrorString(code), + (size_t)lineno, (size_t)column + ); + } + else { + arg = PyUnicode_FromStringAndSize(errmsg, strlen(errmsg)); + } if (arg == NULL) { return NULL; } @@ -179,9 +169,9 @@ set_xml_error(pyexpat_state *state, Py_DECREF(arg); if ( res != NULL - && set_error_code(res, code) - && set_error_location(res, "lineno", lineno) - && set_error_location(res, "offset", column) + && set_xml_error_attr_code(res, code) + && set_xml_error_attr_location(res, "lineno", lineno) + && set_xml_error_attr_location(res, "offset", column) ) { PyErr_SetObject(state->error, res); } From 52402c34f7666f656f0bd223e6802a91ba8bcf7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Sep 2025 18:21:08 +0200 Subject: [PATCH 2/3] make some changes for the next PR --- Doc/library/pyexpat.rst | 4 +- Doc/whatsnew/3.15.rst | 4 +- ...5-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 4 +- Modules/pyexpat.c | 79 ++++++++++++------- 4 files changed, 58 insertions(+), 33 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 4c21bc875217b9..0b08a9b0dedef6 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -281,8 +281,8 @@ common XML vulnerabilities. .. note:: The maximum amplification factor is only considered if the threshold - that can be adjusted :meth:`.SetAllocTrackerActivationThreshold` is - exceeded. + that can be adjusted by :meth:`.SetAllocTrackerActivationThreshold` + is exceeded. .. versionadded:: next diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 3a395c197021d1..fc5519286e2264 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -556,8 +556,8 @@ unittest xml.parsers.expat ----------------- -* Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` - and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` +* Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` + and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. (Contributed by Bénédikt Tran in :gh:`90949`.) diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst index 0719d4353fb708..5611f33fb8e37b 100644 --- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -1,5 +1,5 @@ -Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` -and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` +Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` +and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. Patch by Bénédikt Tran. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 6e04db4c635c99..3711b0fe7b4cad 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1174,6 +1174,50 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, } #endif +#if XML_COMBINED_VERSION >= 20702 +static PyObject * +set_activation_threshold(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold, + XML_Bool (*setter)(XML_Parser, unsigned long long)) +{ + assert(self->itself != NULL); + if (setter(self->itself, threshold) == XML_TRUE) { + Py_RETURN_NONE; + } + // The setter fails if self->itself is NULL (which is not possible here) + // or is a non-root parser, which currently only happens for parsers + // created by ExternalEntityParserCreate(). + pyexpat_state *state = PyType_GetModuleState(cls); + return set_invalid_arg(state, self, "parser must be a root parser"); +} + +static PyObject * +set_maximum_amplification(xmlparseobject *self, + PyTypeObject *cls, + float max_factor, + XML_Bool (*setter)(XML_Parser, float)) +{ + assert(self->itself != NULL); + if (setter(self->itself, max_factor) == XML_TRUE) { + Py_RETURN_NONE; + } + // The setter fails if self->itself is NULL (which is not possible here), + // is a non-root parser, which currently only happens for parsers created + // by ExternalEntityParserCreate(), or if 'max_factor' is NaN or < 1.0. + pyexpat_state *state = PyType_GetModuleState(cls); + // Note: Expat has no API to determine whether a parser is a root parser, + // and since the Expat functions for defining the various maximum allowed + // amplifcation factors fail when a bad parser or a out-of-range factor + // is given without specifying which check failed, we check whether the + // factor is out-of-range to improve the error message. See also gh-90949. + const char *message = (isnan(max_factor) || max_factor < 1.0f) + ? "'max_factor' must be at least 1.0" + : "parser must be a root parser"; + return set_invalid_arg(state, self, message); +} +#endif + #if XML_COMBINED_VERSION >= 20702 /*[clinic input] @permit_long_summary @@ -1195,15 +1239,10 @@ pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, unsigned long long threshold) /*[clinic end generated code: output=bed7e93207ba08c5 input=54182cd71ad69978]*/ { - assert(self->itself != NULL); - if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) { - Py_RETURN_NONE; - } - // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself - // is not a root parser (currently, this is equivalent to be created - // by ExternalEntityParserCreate()). - pyexpat_state *state = PyType_GetModuleState(cls); - return set_invalid_arg(state, self, "parser must be a root parser"); + return set_activation_threshold( + self, cls, threshold, + XML_SetAllocTrackerActivationThreshold + ); } #endif @@ -1238,24 +1277,10 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, float max_factor) /*[clinic end generated code: output=6e44bd48c9b112a0 input=3544abf9dd7ae055]*/ { - assert(self->itself != NULL); - if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) { - Py_RETURN_NONE; - } - // XML_SetAllocTrackerMaximumAmplification() can fail if self->itself - // is not a root parser (currently, this is equivalent to be created - // by ExternalEntityParserCreate()) or if 'max_factor' is NaN or < 1.0. - // - // Expat does not provide a way to determine whether a parser is a root - // or not, nor does it provide a way to distinguish between failures in - // XML_SetAllocTrackerMaximumAmplification() (see gh-90949), we manually - // detect the factor out-of-range issue here so that users have a better - // error message. - pyexpat_state *state = PyType_GetModuleState(cls); - const char *message = (isnan(max_factor) || max_factor < 1.0f) - ? "'max_factor' must be at least 1.0" - : "parser must be a root parser"; - return set_invalid_arg(state, self, message); + return set_maximum_amplification( + self, cls, max_factor, + XML_SetAllocTrackerMaximumAmplification + ); } #endif From ba8651ee9b95a04bd676f5e8e6cae0991eecd2b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Sep 2025 18:35:18 +0200 Subject: [PATCH 3/3] fix typo --- Modules/pyexpat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 3711b0fe7b4cad..a59e565efb00ed 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1208,7 +1208,7 @@ set_maximum_amplification(xmlparseobject *self, pyexpat_state *state = PyType_GetModuleState(cls); // Note: Expat has no API to determine whether a parser is a root parser, // and since the Expat functions for defining the various maximum allowed - // amplifcation factors fail when a bad parser or a out-of-range factor + // amplifcation factors fail when a bad parser or an out-of-range factor // is given without specifying which check failed, we check whether the // factor is out-of-range to improve the error message. See also gh-90949. const char *message = (isnan(max_factor) || max_factor < 1.0f)