From c1c23fbf29411a1c52b326874a95df1b8da65d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:55:30 +0200 Subject: [PATCH 01/32] Expose XML Expat 2.7.2 mitigation APIs --- Modules/clinic/pyexpat.c.h | 136 ++++++++++++++++++++++++++- Modules/expat/expat_external.h | 2 + Modules/expat/pyexpatns.h | 2 + Modules/pyexpat.c | 166 +++++++++++++++++++++++++++++---- 4 files changed, 285 insertions(+), 21 deletions(-) diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index 13210e3be0f747..e133df51bf44f2 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_SINGLETON() #endif +#include "pycore_long.h" // _PyLong_UnsignedLongLong_Converter() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__, @@ -408,6 +409,131 @@ pyexpat_xmlparser_UseForeignDTD(PyObject *self, PyTypeObject *cls, PyObject *con #endif /* (XML_COMBINED_VERSION >= 19505) */ +#if (XML_COMBINED_VERSION >= 20702) + +PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, +"SetAllocTrackerMaximumAmplification($self, max_factor, /)\n" +"--\n" +"\n" +"Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.\n" +"\n" +"By default, parsers objects have a maximum amplification factor of 100.\n" +"\n" +"The amplification factor is calculated as \"allocated / direct\" while parsing,\n" +"where \"direct\" is the number of bytes read from the primary document in parsing\n" +"and \"allocated\" is the number of bytes of dynamic memory allocated in the parser\n" +"hierarchy.\n" +"\n" +"The \'max_factor\' value must be a non-NaN floating point value greater than\n" +"or equal to 1.0. Amplifications factors greater than 100 can been observed\n" +"near the start of parsing even with benign files in practice. As such, the\n" +"upper bound must be carefully chosen so to avoid false positives."); + +#define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ + {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, + PyTypeObject *cls, + float max_factor); + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "SetAllocTrackerMaximumAmplification", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + float max_factor; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyFloat_CheckExact(args[0])) { + max_factor = (float) (PyFloat_AS_DOUBLE(args[0])); + } + else + { + max_factor = (float) PyFloat_AsDouble(args[0]); + if (max_factor == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + return_value = pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl((xmlparseobject *)self, cls, max_factor); + +exit: + return return_value; +} + +#endif /* (XML_COMBINED_VERSION >= 20702) */ + +#if (XML_COMBINED_VERSION >= 20702) + +PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, +"SetAllocTrackerActivationThreshold($self, threshold, /)\n" +"--\n" +"\n" +"Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.\n" +"\n" +"By default, parsers objects have an allocation activation threshold of 64 MiB."); + +#define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF \ + {"SetAllocTrackerActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__}, + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold); + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerActivationThreshold(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "SetAllocTrackerActivationThreshold", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + unsigned long long threshold; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLongLong_Converter(args[0], &threshold)) { + goto exit; + } + return_value = pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl((xmlparseobject *)self, cls, threshold); + +exit: + return return_value; +} + +#endif /* (XML_COMBINED_VERSION >= 20702) */ + PyDoc_STRVAR(pyexpat_ParserCreate__doc__, "ParserCreate($module, /, encoding=None, namespace_separator=None,\n" " intern=)\n" @@ -552,4 +678,12 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=4dbdc959c67dc2d5 input=a9049054013a1b77]*/ + +#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF + #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF) */ + +#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF + #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF) */ +/*[clinic end generated code: output=a6e2c3f6343636ac input=a9049054013a1b77]*/ diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h index 0f01a05d0e9560..f77f3fd20d6865 100644 --- a/Modules/expat/expat_external.h +++ b/Modules/expat/expat_external.h @@ -39,6 +39,8 @@ #ifndef Expat_External_INCLUDED # define Expat_External_INCLUDED 1 +/* Required so that functions in expat.h are declared */ +#include "expat_config.h" /* Namespace external symbols to allow multiple libexpat version to co-exist. */ #include "pyexpatns.h" diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h index 8ee03ef0792815..fc6b482d587e0d 100644 --- a/Modules/expat/pyexpatns.h +++ b/Modules/expat/pyexpatns.h @@ -82,6 +82,8 @@ #define XmlPrologStateInit PyExpat_XmlPrologStateInit #define XmlPrologStateInitExternalEntity PyExpat_XmlPrologStateInitExternalEntity #define XML_ResumeParser PyExpat_XML_ResumeParser +#define XML_SetAllocTrackerActivationThreshold PyExpat_XML_SetAllocTrackerActivationThreshold +#define XML_SetAllocTrackerMaximumAmplification PyExpat_XML_SetAllocTrackerMaximumAmplification #define XML_SetAttlistDeclHandler PyExpat_XML_SetAttlistDeclHandler #define XML_SetBase PyExpat_XML_SetBase #define XML_SetBillionLaughsAttackProtectionActivationThreshold PyExpat_XML_SetBillionLaughsAttackProtectionActivationThreshold diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 878368bf323dcd..36c14d6b14587a 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -7,8 +7,10 @@ #include "pycore_pyhash.h" // _Py_HashSecret #include "pycore_traceback.h" // _PyTraceback_Add() +#include // FLT_MAX #include #include // offsetof() + #include "expat.h" #include "pyexpat.h" @@ -138,31 +140,72 @@ set_error_attr(PyObject *err, const char *name, int value) return 1; } +static PyObject * +format_xml_error(enum XML_Error code, int lineno, int column) +{ + const char *errmsg = XML_ErrorString(code); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(strlen(errmsg) + 1); + if (writer == NULL) { + return NULL; + } + if (PyUnicodeWriter_Format(writer, + "%s: line %i, column %i", + errmsg, lineno, column) < 0) + { + PyUnicodeWriter_Discard(writer); + return NULL; + } + return PyUnicodeWriter_Finish(writer); +} + +static PyObject * +set_xml_error(pyexpat_state *state, + enum XML_Error code, int lineno, int column, + const char *errmsg) +{ + PyObject *arg = errmsg == NULL + ? format_xml_error(code, lineno, column) + : PyUnicode_FromStringAndSize(errmsg, strlen(errmsg)); + if (arg == NULL) { + return NULL; + } + PyObject *res = PyObject_CallOneArg(state->error, arg); + Py_DECREF(arg); + if ( + res != NULL + && set_error_attr(res, "code", code) + && set_error_attr(res, "lineno", lineno) + && set_error_attr(res, "offset", column) + ) { + PyErr_SetObject(state->error, res); + Py_DECREF(res); + } + return NULL; +} + +#define SET_XML_ERROR(STATE, SELF, CODE, ERRMSG) \ + do { \ + XML_Parser parser = SELF->itself; \ + assert(parser != NULL); \ + int lineno = XML_GetErrorLineNumber(parser); \ + int column = XML_GetErrorColumnNumber(parser); \ + (void)set_xml_error(state, CODE, lineno, column, ERRMSG); \ + } while (0) + /* Build and set an Expat exception, including positioning * information. Always returns NULL. */ static PyObject * set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code) { - PyObject *err; - PyObject *buffer; - XML_Parser parser = self->itself; - int lineno = XML_GetErrorLineNumber(parser); - int column = XML_GetErrorColumnNumber(parser); + SET_XML_ERROR(state, self, code, NULL); + return NULL; +} - buffer = PyUnicode_FromFormat("%s: line %i, column %i", - XML_ErrorString(code), lineno, column); - if (buffer == NULL) - return NULL; - err = PyObject_CallOneArg(state->error, buffer); - Py_DECREF(buffer); - if ( err != NULL - && set_error_attr(err, "code", code) - && set_error_attr(err, "offset", column) - && set_error_attr(err, "lineno", lineno)) { - PyErr_SetObject(state->error, err); - } - Py_XDECREF(err); +static PyObject * +set_invalid_arg(pyexpat_state *state, xmlparseobject *self, const char *errmsg) +{ + SET_XML_ERROR(state, self, XML_ERROR_INVALID_ARGUMENT, errmsg); return NULL; } @@ -1133,6 +1176,89 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, } #endif +#if XML_COMBINED_VERSION >= 20702 +/*[clinic input] +@permit_long_summary +@permit_long_docstring_body +pyexpat.xmlparser.SetAllocTrackerMaximumAmplification + + cls: defining_class + max_factor: float + / + +Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated. + +By default, parsers objects have a maximum amplification factor of 100. + +The amplification factor is calculated as "allocated / direct" while parsing, +where "direct" is the number of bytes read from the primary document in parsing +and "allocated" is the number of bytes of dynamic memory allocated in the parser +hierarchy. + +The 'max_factor' value must be a non-NaN floating point value greater than +or equal to 1.0. Amplifications factors greater than 100 can been observed +near the start of parsing even with benign files in practice. As such, the +upper bound must be carefully chosen so to avoid false positives. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, + PyTypeObject *cls, + float max_factor) +/*[clinic end generated code: output=6e44bd48c9b112a0 input=18e8d07329c0efda]*/ +{ + assert(self->itself != NULL); + if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) { + Py_RETURN_NONE; + } + // XML_SetAllocTrackerMaximumAmplification() can fail if self->itself + // is not a root parser (currently, this is equivalent to be created + // by ExternalEntityParserCreate()) or if 'max_factor' is NaN or < 1.0. + // + // Expat does not provide a way to determine whether a parser is a root + // or not, nor does it provide a way to distinguish between failures in + // XML_SetAllocTrackerMaximumAmplification() (see gh-90949), we manually + // detect the factor out-of-range issue here so that users have a better + // error message. + pyexpat_state *state = PyType_GetModuleState(cls); + const char *message = (isnan(max_factor) || max_factor < 1.0f) + ? "'max_factor' must be at least 1.0" + : "parser must be a root parser"; + return set_invalid_arg(state, self, message); +} + +/*[clinic input] +@permit_long_summary +@permit_long_docstring_body +pyexpat.xmlparser.SetAllocTrackerActivationThreshold + + cls: defining_class + threshold: unsigned_long_long + / + +Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. + +By default, parsers objects have an allocation activation threshold of 64 MiB. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold) +/*[clinic end generated code: output=bed7e93207ba08c5 input=8453509a137a47c0]*/ +{ + assert(self->itself != NULL); + if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) { + Py_RETURN_NONE; + } + // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself + // is not a root parser (currently, this is equivalent to be created + // by ExternalEntityParserCreate()). + pyexpat_state *state = PyType_GetModuleState(cls); + return set_invalid_arg(state, self, "parser must be a root parser"); +} +#endif + static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_PARSE_METHODDEF PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF @@ -1141,9 +1267,9 @@ static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF -#if XML_COMBINED_VERSION >= 19505 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF -#endif + PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF + PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF {NULL, NULL} /* sentinel */ From 12bef9ceac44b3b70ef41bf97f389d0d8291721e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 13:46:54 +0200 Subject: [PATCH 02/32] add tests --- Lib/test/test_pyexpat.py | 89 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index d4b4f60be980a5..e8590a47ce667e 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -2,9 +2,11 @@ # handler, are obscure and unhelpful. import os +import re import sys import sysconfig import unittest +import textwrap import traceback from io import BytesIO from test import support @@ -821,5 +823,92 @@ def start_element(name, _): self.assertEqual(started, ['doc']) +class AttackProtectionTest(unittest.TestCase): + + def billion_laughs(self, ncols, nrows, text='.'): + """Create a billion laugh payload. + + Be careful: the number of total items is pow(n, k), thereby + requiring at least pow(ncols, nrows) * sizeof(base) memory! + """ + body = textwrap.indent('\n'.join( + f'' + for i in range(nrows) + ), ' ') + return f"""\ + + + +{body} +]> +&row{nrows}; +""" + + def test_set_alloc_tracker_maximum_amplification(self): + payload = self.billion_laughs(10, 4) + + p = expat.ParserCreate() + # Unconditionally enable maximum amplification factor. + p.SetAllocTrackerActivationThreshold(0) + # At runtime, the peak amplification factor is 101.71, + # which is above the default threshold (100.0). + msg = re.escape("out of memory: line 3, column 15") + self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) + + # # Re-create a parser as the current parser is now in an error state. + p = expat.ParserCreate() + # Unconditionally enable maximum amplification factor. + p.SetAllocTrackerActivationThreshold(0) + # Use a max amplification factor a bit above the actual one. + self.assertIsNone(p.SetAllocTrackerMaximumAmplification(101.72)) + self.assertIsNotNone(p.Parse(payload)) + + def test_set_alloc_tracker_maximum_amplification_invalid_args(self): + parser = expat.ParserCreate() + f = parser.SetAllocTrackerMaximumAmplification + + msg = re.escape("'max_factor' must be at least 1.0") + self.assertRaisesRegex(expat.ExpatError, msg, f, float('nan')) + self.assertRaisesRegex(expat.ExpatError, msg, f, 0.99) + + subparser = parser.ExternalEntityParserCreate(None) + fsub = subparser.SetAllocTrackerMaximumAmplification + msg = re.escape("parser must be a root parser") + self.assertRaisesRegex(expat.ExpatError, msg, fsub, 1.0) + + def test_set_alloc_tracker_activation_threshold(self): + # Run the test with EXPAT_MALLOC_DEBUG=2 to detect those constants. + MAX_ALLOC = 17333 + MIN_ALLOC = 1096 + + payload = self.billion_laughs(10, 4) + + p = expat.ParserCreate() + p.SetAllocTrackerActivationThreshold(MAX_ALLOC + 1) + self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) + # Check that we never reach the activation threshold. + self.assertIsNotNone(p.Parse(payload)) + + p = expat.ParserCreate() + p.SetAllocTrackerActivationThreshold(MIN_ALLOC - 1) + # Check that we always reach the activation threshold. + self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) + msg = re.escape("out of memory: line 3, column 10") + self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) + + def test_set_alloc_tracker_activation_threshold_invalid_args(self): + parser = expat.ParserCreate() + f = parser.SetAllocTrackerActivationThreshold + + ULONG_LONG_MAX = 2 * sys.maxsize + 1 + self.assertRaises(OverflowError, f, ULONG_LONG_MAX + 1) + + subparser = parser.ExternalEntityParserCreate(None) + fsub = subparser.SetAllocTrackerActivationThreshold + msg = re.escape("parser must be a root parser") + self.assertRaisesRegex(expat.ExpatError, msg, fsub, 12345) + + if __name__ == "__main__": unittest.main() From 1d7e599f4006603817c3242ecf786ae5a0b7b6df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:36:38 +0200 Subject: [PATCH 03/32] docs --- Doc/library/pyexpat.rst | 47 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 5506ac828e5abe..ce357ee153e35a 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -231,6 +231,53 @@ XMLParser Objects .. versionadded:: 3.13 +:class:`xmlparser` objects have the following methods to mitigate some +well-known XML vulnerabilities. + +.. method:: xmlparser.SetAllocTrackerMaximumAmplification(max_factor, /) + + Sets the maximum amplification factor between direct input and bytes + of dynamic memory allocated. + + By default, parsers objects have a maximum amplification factor of 100. + + The amplification factor is calculated as ``allocated / direct`` + while parsing, where ``direct`` is the number of bytes read from + the primary document in parsing and ``allocated`` is the number + of bytes of dynamic memory allocated in the parser hierarchy. + + The *max_factor* value must be a non-NaN :class:`float` value greater than + or equal to 1.0. Amplifications factors greater than 100 can been observed + near the start of parsing even with benign files in practice. As such, the + upper bound must be carefully chosen so to avoid false positives. + + An :exc:`ExpatError` is raised if this method is called by a non-root + parser or if *max_factor* is outside the valid range. The corresponding + :attr:`~ExpatError.lineno` and :attr:`~ExpatError.column` should not be + used as they will have no special meaning. + + .. note:: + + The maximum amplification factor is only considered if the threshold + specified by :meth:`.SetAllocTrackerActivationThreshold` is reached. + + .. versionadded:: next + +.. method:: xmlparser.SetAllocTrackerActivationThreshold(threshold, /) + + Sets the number of allocated bytes of dynamic memory needed to activate + protection against disproportionate use of RAM. + + By default, parsers objects have an allocation activation threshold of 64 MiB, + or equivalently 67,108,864 bytes. + + An :exc:`ExpatError` is raised if this method is called by a non-root parser. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.column` + should not be used as they will have no special meaning. + + .. versionadded:: next + + :class:`xmlparser` objects have the following attributes: From 3dcd9bd3228dad609ae6395d2175d0f83fe1facd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:40:16 +0200 Subject: [PATCH 04/32] NEWS --- Doc/whatsnew/3.15.rst | 10 ++++++++++ .../2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 5 +++++ 2 files changed, 15 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index dc16c944886bd0..b8a21216d650f5 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -518,6 +518,16 @@ unittest (Contributed by Garry Cairns in :gh:`134567`.) +xml.parsers.expat +----------------- + +* Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` + and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` + to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of + disproportional amounts of dynamic memory from within an Expat parser. + (Contributed by Bénédikt Tran in :gh:`90949`.) + + zlib ---- diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst new file mode 100644 index 00000000000000..74f6b78ced7e70 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -0,0 +1,5 @@ +Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` +and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` +to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of +disproportional amounts of dynamic memory from within an Expat parser. Patch +by Bénédikt Tran. From 0ecbd5526c42cfa87d0f130df39ec531e59a2d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:51:47 +0200 Subject: [PATCH 05/32] fix docs --- Doc/library/pyexpat.rst | 4 ++-- Doc/whatsnew/3.15.rst | 2 +- .../Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index ce357ee153e35a..0e66c4dd3c0e47 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -253,7 +253,7 @@ well-known XML vulnerabilities. An :exc:`ExpatError` is raised if this method is called by a non-root parser or if *max_factor* is outside the valid range. The corresponding - :attr:`~ExpatError.lineno` and :attr:`~ExpatError.column` should not be + :attr:`~.ExpatError.lineno` and :attr:`~.ExpatError.column` should not be used as they will have no special meaning. .. note:: @@ -272,7 +272,7 @@ well-known XML vulnerabilities. or equivalently 67,108,864 bytes. An :exc:`ExpatError` is raised if this method is called by a non-root parser. - The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.column` + The corresponding :attr:`~.ExpatError.lineno` and :attr:`~.ExpatError.column` should not be used as they will have no special meaning. .. versionadded:: next diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 716d149a4a0ef6..c431981eea895f 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -545,7 +545,7 @@ xml.parsers.expat * Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` - to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of + to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. (Contributed by Bénédikt Tran in :gh:`90949`.) diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst index 74f6b78ced7e70..5baa3ea347065d 100644 --- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -1,5 +1,5 @@ Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` -to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of +to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. Patch by Bénédikt Tran. From 07445ad2726123991e64eb63951ae35400601ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 17:04:08 +0200 Subject: [PATCH 06/32] fix tests --- Lib/test/test_pyexpat.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index e8590a47ce667e..1a53f863232ec3 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -10,7 +10,7 @@ import traceback from io import BytesIO from test import support -from test.support import os_helper +from test.support import import_helper, os_helper from test.support import sortdict from unittest import mock from xml.parsers import expat @@ -825,7 +825,7 @@ def start_element(name, _): class AttackProtectionTest(unittest.TestCase): - def billion_laughs(self, ncols, nrows, text='.'): + def billion_laughs(self, ncols, nrows, text='.', indent=' '): """Create a billion laugh payload. Be careful: the number of total items is pow(n, k), thereby @@ -834,34 +834,36 @@ def billion_laughs(self, ncols, nrows, text='.'): body = textwrap.indent('\n'.join( f'' for i in range(nrows) - ), ' ') + ), indent) return f"""\ - +{indent} +{indent} {body} ]> &row{nrows}; """ def test_set_alloc_tracker_maximum_amplification(self): - payload = self.billion_laughs(10, 4) + # On WASI, the maximum amplification factor of the payload may differ, + # so we craft a payload that is likely to yield an allocation factor + # way larger than 1.0 and way smaller than 10^5. + payload = self.billion_laughs(1, 2) p = expat.ParserCreate() # Unconditionally enable maximum amplification factor. p.SetAllocTrackerActivationThreshold(0) - # At runtime, the peak amplification factor is 101.71, - # which is above the default threshold (100.0). - msg = re.escape("out of memory: line 3, column 15") + # Use a max amplification factor likely to be below the real one. + self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) + msg = r"out of memory: line \d+, column \d+" self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) # # Re-create a parser as the current parser is now in an error state. p = expat.ParserCreate() # Unconditionally enable maximum amplification factor. p.SetAllocTrackerActivationThreshold(0) - # Use a max amplification factor a bit above the actual one. - self.assertIsNone(p.SetAllocTrackerMaximumAmplification(101.72)) + self.assertIsNone(p.SetAllocTrackerMaximumAmplification(10_000)) self.assertIsNotNone(p.Parse(payload)) def test_set_alloc_tracker_maximum_amplification_invalid_args(self): @@ -894,20 +896,21 @@ def test_set_alloc_tracker_activation_threshold(self): p.SetAllocTrackerActivationThreshold(MIN_ALLOC - 1) # Check that we always reach the activation threshold. self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) - msg = re.escape("out of memory: line 3, column 10") + msg = r"out of memory: line \d+, column \d+" self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) - def test_set_alloc_tracker_activation_threshold_invalid_args(self): + def test_set_alloc_tracker_activation_threshold_overflown_args(self): + _testcapi = import_helper.import_module("_testcapi") parser = expat.ParserCreate() f = parser.SetAllocTrackerActivationThreshold + self.assertRaises(OverflowError, f, _testcapi.ULLONG_MAX + 1) - ULONG_LONG_MAX = 2 * sys.maxsize + 1 - self.assertRaises(OverflowError, f, ULONG_LONG_MAX + 1) - + def test_set_alloc_tracker_activation_threshold_invalid_args(self): + parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) - fsub = subparser.SetAllocTrackerActivationThreshold + f = subparser.SetAllocTrackerActivationThreshold msg = re.escape("parser must be a root parser") - self.assertRaisesRegex(expat.ExpatError, msg, fsub, 12345) + self.assertRaisesRegex(expat.ExpatError, msg, f, 12345) if __name__ == "__main__": From 9c7371f416eaf70f7eaa2c73d8792c1dca8d5cc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 17:22:51 +0200 Subject: [PATCH 07/32] regen SBOM --- Misc/sbom.spdx.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 56bd64267f67dd..a2f8a1da194b73 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -62,11 +62,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c22196e3d8bee88fcdda715623b3b9d2119d2fb3" + "checksumValue": "2f6789397c983be0e8991c16b2cfcda119033eda" }, { "algorithm": "SHA256", - "checksumValue": "f2c2283ba03b057e92beefc7f81ba901ebb6dfc1a45b036c8a7d65808eb77a84" + "checksumValue": "6d2bf3dbc757c92b3e587ef819ebbb07771325eeee0b5ac8430bc400182db9e9" } ], "fileName": "Modules/expat/expat_external.h" From 10855847249df4dd140e0a24b441dc6c3bca3846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 18:43:48 +0200 Subject: [PATCH 08/32] remove unused include --- Modules/pyexpat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 36c14d6b14587a..243bc51af9831c 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -7,10 +7,8 @@ #include "pycore_pyhash.h" // _Py_HashSecret #include "pycore_traceback.h" // _PyTraceback_Add() -#include // FLT_MAX #include #include // offsetof() - #include "expat.h" #include "pyexpat.h" From c10fe919a75b94e43be526b8e3fa74ea9ea6f508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 18:45:23 +0200 Subject: [PATCH 09/32] fix possible error handling --- Modules/pyexpat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 243bc51af9831c..44eb75db1f672b 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -176,8 +176,8 @@ set_xml_error(pyexpat_state *state, && set_error_attr(res, "offset", column) ) { PyErr_SetObject(state->error, res); - Py_DECREF(res); } + Py_XDECREF(res); return NULL; } From 18d175f79098b864075d6a2f06f6cc8b3d61b2f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 18:45:44 +0200 Subject: [PATCH 10/32] undef macro after usage --- Modules/pyexpat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 44eb75db1f672b..be77e86b22c518 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -207,6 +207,8 @@ set_invalid_arg(pyexpat_state *state, xmlparseobject *self, const char *errmsg) return NULL; } +#undef SET_XML_ERROR + static int have_handler(xmlparseobject *self, int type) { From 911b2b7eb9e197ef28725a8292e82bb737c8fa4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 18:48:04 +0200 Subject: [PATCH 11/32] Update Lib/test/test_pyexpat.py --- Lib/test/test_pyexpat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 1a53f863232ec3..af6944ff7bd777 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -829,7 +829,7 @@ def billion_laughs(self, ncols, nrows, text='.', indent=' '): """Create a billion laugh payload. Be careful: the number of total items is pow(n, k), thereby - requiring at least pow(ncols, nrows) * sizeof(base) memory! + requiring at least pow(ncols, nrows) * sizeof(text) memory! """ body = textwrap.indent('\n'.join( f'' From d63668539630127da9be8bad570590f9a1b4aab3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 19:12:17 +0200 Subject: [PATCH 12/32] update comments --- Lib/test/test_pyexpat.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index af6944ff7bd777..4b558b4ef67ac6 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -825,12 +825,14 @@ def start_element(name, _): class AttackProtectionTest(unittest.TestCase): - def billion_laughs(self, ncols, nrows, text='.', indent=' '): - """Create a billion laugh payload. + def exponential_expansion_payload(self, ncols, nrows, text='.'): + """Create a billion laughs attack payload. Be careful: the number of total items is pow(n, k), thereby requiring at least pow(ncols, nrows) * sizeof(text) memory! """ + # 'indent' affects the peak amplification factor and allocation + indent = ' ' * 2 body = textwrap.indent('\n'.join( f'' for i in range(nrows) @@ -847,9 +849,9 @@ def billion_laughs(self, ncols, nrows, text='.', indent=' '): def test_set_alloc_tracker_maximum_amplification(self): # On WASI, the maximum amplification factor of the payload may differ, - # so we craft a payload that is likely to yield an allocation factor + # so we craft a payload that is likely to yield an amplification factor # way larger than 1.0 and way smaller than 10^5. - payload = self.billion_laughs(1, 2) + payload = self.exponential_expansion_payload(1, 2) p = expat.ParserCreate() # Unconditionally enable maximum amplification factor. @@ -859,7 +861,7 @@ def test_set_alloc_tracker_maximum_amplification(self): msg = r"out of memory: line \d+, column \d+" self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) - # # Re-create a parser as the current parser is now in an error state. + # Re-create a parser as the current parser is now in an error state. p = expat.ParserCreate() # Unconditionally enable maximum amplification factor. p.SetAllocTrackerActivationThreshold(0) @@ -880,11 +882,11 @@ def test_set_alloc_tracker_maximum_amplification_invalid_args(self): self.assertRaisesRegex(expat.ExpatError, msg, fsub, 1.0) def test_set_alloc_tracker_activation_threshold(self): - # Run the test with EXPAT_MALLOC_DEBUG=2 to detect those constants. + # Run the test with EXPAT_MALLOC_DEBUG=2 to find those constants. MAX_ALLOC = 17333 MIN_ALLOC = 1096 - payload = self.billion_laughs(10, 4) + payload = self.exponential_expansion_payload(10, 4) p = expat.ParserCreate() p.SetAllocTrackerActivationThreshold(MAX_ALLOC + 1) From b9510650de13c2b688e198f2df5fc0c7e0b9fca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:37:22 +0200 Subject: [PATCH 13/32] use better test names --- Lib/test/test_pyexpat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 4b558b4ef67ac6..88a994cd912109 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -868,7 +868,7 @@ def test_set_alloc_tracker_maximum_amplification(self): self.assertIsNone(p.SetAllocTrackerMaximumAmplification(10_000)) self.assertIsNotNone(p.Parse(payload)) - def test_set_alloc_tracker_maximum_amplification_invalid_args(self): + def test_set_alloc_tracker_maximum_amplification_invalid(self): parser = expat.ParserCreate() f = parser.SetAllocTrackerMaximumAmplification @@ -901,13 +901,13 @@ def test_set_alloc_tracker_activation_threshold(self): msg = r"out of memory: line \d+, column \d+" self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) - def test_set_alloc_tracker_activation_threshold_overflown_args(self): + def test_set_alloc_tracker_activation_threshold_overflow(self): _testcapi = import_helper.import_module("_testcapi") parser = expat.ParserCreate() f = parser.SetAllocTrackerActivationThreshold self.assertRaises(OverflowError, f, _testcapi.ULLONG_MAX + 1) - def test_set_alloc_tracker_activation_threshold_invalid_args(self): + def test_set_alloc_tracker_activation_threshold_invalid(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) f = subparser.SetAllocTrackerActivationThreshold From e11bf14f1f6d84ee16812f0c022699b7f7eab2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:37:37 +0200 Subject: [PATCH 14/32] simplify roles usage --- Doc/library/pyexpat.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 0e66c4dd3c0e47..14a34c06886a2b 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -251,10 +251,10 @@ well-known XML vulnerabilities. near the start of parsing even with benign files in practice. As such, the upper bound must be carefully chosen so to avoid false positives. - An :exc:`ExpatError` is raised if this method is called by a non-root - parser or if *max_factor* is outside the valid range. The corresponding - :attr:`~.ExpatError.lineno` and :attr:`~.ExpatError.column` should not be - used as they will have no special meaning. + An :exc:`ExpatError` is raised if this method is called on a non-root + parser or if *max_factor* is outside the valid range. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` + should not be used as they may have no special meaning. .. note:: @@ -271,9 +271,9 @@ well-known XML vulnerabilities. By default, parsers objects have an allocation activation threshold of 64 MiB, or equivalently 67,108,864 bytes. - An :exc:`ExpatError` is raised if this method is called by a non-root parser. - The corresponding :attr:`~.ExpatError.lineno` and :attr:`~.ExpatError.column` - should not be used as they will have no special meaning. + An :exc:`ExpatError` is raised if this method is called on a non-root parser. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` + should not be used as they may have no special meaning. .. versionadded:: next From 3e45613aa75b803574040faaf60e78bbdb7eb3ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:39:50 +0200 Subject: [PATCH 15/32] prevent reparse deferral of Expat to blow up --- Lib/test/test_pyexpat.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 88a994cd912109..9e4025eec8ccda 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -859,14 +859,14 @@ def test_set_alloc_tracker_maximum_amplification(self): # Use a max amplification factor likely to be below the real one. self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) msg = r"out of memory: line \d+, column \d+" - self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) + self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload, True) # Re-create a parser as the current parser is now in an error state. p = expat.ParserCreate() # Unconditionally enable maximum amplification factor. p.SetAllocTrackerActivationThreshold(0) self.assertIsNone(p.SetAllocTrackerMaximumAmplification(10_000)) - self.assertIsNotNone(p.Parse(payload)) + self.assertIsNotNone(p.Parse(payload, True)) def test_set_alloc_tracker_maximum_amplification_invalid(self): parser = expat.ParserCreate() @@ -878,7 +878,7 @@ def test_set_alloc_tracker_maximum_amplification_invalid(self): subparser = parser.ExternalEntityParserCreate(None) fsub = subparser.SetAllocTrackerMaximumAmplification - msg = re.escape("parser must be a root parser") + msg = "parser must be a root parser" self.assertRaisesRegex(expat.ExpatError, msg, fsub, 1.0) def test_set_alloc_tracker_activation_threshold(self): @@ -892,14 +892,14 @@ def test_set_alloc_tracker_activation_threshold(self): p.SetAllocTrackerActivationThreshold(MAX_ALLOC + 1) self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) # Check that we never reach the activation threshold. - self.assertIsNotNone(p.Parse(payload)) + self.assertIsNotNone(p.Parse(payload, True)) p = expat.ParserCreate() p.SetAllocTrackerActivationThreshold(MIN_ALLOC - 1) # Check that we always reach the activation threshold. self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) msg = r"out of memory: line \d+, column \d+" - self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload) + self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload, True) def test_set_alloc_tracker_activation_threshold_overflow(self): _testcapi = import_helper.import_module("_testcapi") @@ -911,7 +911,7 @@ def test_set_alloc_tracker_activation_threshold_invalid(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) f = subparser.SetAllocTrackerActivationThreshold - msg = re.escape("parser must be a root parser") + msg = "parser must be a root parser" self.assertRaisesRegex(expat.ExpatError, msg, f, 12345) From fb83fb57c5bec75f7acc49c03463fdfdb17c7b1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:47:38 +0200 Subject: [PATCH 16/32] test better numeric values --- Lib/test/test_pyexpat.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 9e4025eec8ccda..b41f64131b808c 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -868,7 +868,12 @@ def test_set_alloc_tracker_maximum_amplification(self): self.assertIsNone(p.SetAllocTrackerMaximumAmplification(10_000)) self.assertIsNotNone(p.Parse(payload, True)) - def test_set_alloc_tracker_maximum_amplification_invalid(self): + def test_set_alloc_tracker_maximum_amplification_infty(self): + inf = float('inf') # an 'inf' threshold is allowed + parser = expat.ParserCreate() + self.assertIsNone(parser.SetAllocTrackerMaximumAmplification(inf)) + + def test_set_alloc_tracker_maximum_amplification_fail_for_subparser(self): parser = expat.ParserCreate() f = parser.SetAllocTrackerMaximumAmplification @@ -882,37 +887,42 @@ def test_set_alloc_tracker_maximum_amplification_invalid(self): self.assertRaisesRegex(expat.ExpatError, msg, fsub, 1.0) def test_set_alloc_tracker_activation_threshold(self): - # Run the test with EXPAT_MALLOC_DEBUG=2 to find those constants. - MAX_ALLOC = 17333 - MIN_ALLOC = 1096 - + # The payload is expected to have a peak allocation of + # at least 3 bytes but strictly less than 10^5 bytes. payload = self.exponential_expansion_payload(10, 4) p = expat.ParserCreate() - p.SetAllocTrackerActivationThreshold(MAX_ALLOC + 1) + p.SetAllocTrackerActivationThreshold(pow(10, 5)) self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) # Check that we never reach the activation threshold. self.assertIsNotNone(p.Parse(payload, True)) p = expat.ParserCreate() - p.SetAllocTrackerActivationThreshold(MIN_ALLOC - 1) + p.SetAllocTrackerActivationThreshold(2) # Check that we always reach the activation threshold. self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) msg = r"out of memory: line \d+, column \d+" self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload, True) - def test_set_alloc_tracker_activation_threshold_overflow(self): + def test_set_alloc_tracker_activation_threshold_arg_invalid(self): + parser = expat.ParserCreate() + f = parser.SetAllocTrackerActivationThreshold + self.assertRaises(TypeError, f, 1.0) + self.assertRaises(TypeError, f, -1.5) + self.assertRaises(ValueError, f, -5) + + def test_set_alloc_tracker_activation_threshold_arg_overflow(self): _testcapi = import_helper.import_module("_testcapi") parser = expat.ParserCreate() f = parser.SetAllocTrackerActivationThreshold self.assertRaises(OverflowError, f, _testcapi.ULLONG_MAX + 1) - def test_set_alloc_tracker_activation_threshold_invalid(self): + def test_set_alloc_tracker_activation_threshold_fail_for_subparser(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) - f = subparser.SetAllocTrackerActivationThreshold + fsub = subparser.SetAllocTrackerActivationThreshold msg = "parser must be a root parser" - self.assertRaisesRegex(expat.ExpatError, msg, f, 12345) + self.assertRaisesRegex(expat.ExpatError, msg, fsub, 12345) if __name__ == "__main__": From 7f91f2e1ba2c37ac31cce7aee89dd7fb093b210a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:50:47 +0200 Subject: [PATCH 17/32] update docs --- Doc/library/pyexpat.rst | 5 +++-- Modules/clinic/pyexpat.c.h | 4 ++-- Modules/pyexpat.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 14a34c06886a2b..dcb87d7e8b8cd9 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -249,7 +249,7 @@ well-known XML vulnerabilities. The *max_factor* value must be a non-NaN :class:`float` value greater than or equal to 1.0. Amplifications factors greater than 100 can been observed near the start of parsing even with benign files in practice. As such, the - upper bound must be carefully chosen so to avoid false positives. + activation threshold should be carefully chosen to avoid false positives. An :exc:`ExpatError` is raised if this method is called on a non-root parser or if *max_factor* is outside the valid range. @@ -259,7 +259,8 @@ well-known XML vulnerabilities. .. note:: The maximum amplification factor is only considered if the threshold - specified by :meth:`.SetAllocTrackerActivationThreshold` is reached. + that can be adjusted :meth:`.SetAllocTrackerActivationThreshold` is + exceeded. .. versionadded:: next diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index e133df51bf44f2..8bde10b6c894c2 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -427,7 +427,7 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, "The \'max_factor\' value must be a non-NaN floating point value greater than\n" "or equal to 1.0. Amplifications factors greater than 100 can been observed\n" "near the start of parsing even with benign files in practice. As such, the\n" -"upper bound must be carefully chosen so to avoid false positives."); +"activation threshold should be carefully chosen to avoid false positives."); #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, @@ -686,4 +686,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF) */ -/*[clinic end generated code: output=a6e2c3f6343636ac input=a9049054013a1b77]*/ +/*[clinic end generated code: output=50bbe2bfee799576 input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index be77e86b22c518..774865847ee4d7 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1198,14 +1198,14 @@ hierarchy. The 'max_factor' value must be a non-NaN floating point value greater than or equal to 1.0. Amplifications factors greater than 100 can been observed near the start of parsing even with benign files in practice. As such, the -upper bound must be carefully chosen so to avoid false positives. +activation threshold should be carefully chosen to avoid false positives. [clinic start generated code]*/ static PyObject * pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, PyTypeObject *cls, float max_factor) -/*[clinic end generated code: output=6e44bd48c9b112a0 input=18e8d07329c0efda]*/ +/*[clinic end generated code: output=6e44bd48c9b112a0 input=23ca8b8f7de04462]*/ { assert(self->itself != NULL); if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) { From 64af05ca6e16ce5b9ad408cb60fc134c27112524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 21:07:09 +0200 Subject: [PATCH 18/32] avoid deprecated `XML_GetError{Line,Column}Number` --- Modules/pyexpat.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 774865847ee4d7..6d1f52bb0a021f 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -122,24 +122,30 @@ CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info, setter(xml_parser, xml_handler); } +static int +set_error_code(PyObject *err, enum XML_Error code) +{ + PyObject *v = PyLong_FromLong((long)code); + int ok = v != NULL && PyObject_SetAttr(err, &_Py_ID(code), v) != -1; + Py_XDECREF(v); + return ok; +} + /* Set an integer attribute on the error object; return true on success, * false on an exception. */ static int -set_error_attr(PyObject *err, const char *name, int value) +set_error_location(PyObject *err, const char *name, XML_Size value) { - PyObject *v = PyLong_FromLong(value); - - if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) { - Py_XDECREF(v); - return 0; - } - Py_DECREF(v); - return 1; + PyObject *v = PyLong_FromSize_t((size_t)value); + int ok = v != NULL && PyObject_SetAttrString(err, name, v) != -1; + Py_XDECREF(v); + return ok; } + static PyObject * -format_xml_error(enum XML_Error code, int lineno, int column) +format_xml_error(enum XML_Error code, XML_Size lineno, XML_Size column) { const char *errmsg = XML_ErrorString(code); PyUnicodeWriter *writer = PyUnicodeWriter_Create(strlen(errmsg) + 1); @@ -147,8 +153,8 @@ format_xml_error(enum XML_Error code, int lineno, int column) return NULL; } if (PyUnicodeWriter_Format(writer, - "%s: line %i, column %i", - errmsg, lineno, column) < 0) + "%s: line %zu, column %zu", + errmsg, (size_t)lineno, (size_t)column) < 0) { PyUnicodeWriter_Discard(writer); return NULL; @@ -158,7 +164,7 @@ format_xml_error(enum XML_Error code, int lineno, int column) static PyObject * set_xml_error(pyexpat_state *state, - enum XML_Error code, int lineno, int column, + enum XML_Error code, XML_Size lineno, XML_Size column, const char *errmsg) { PyObject *arg = errmsg == NULL @@ -171,9 +177,9 @@ set_xml_error(pyexpat_state *state, Py_DECREF(arg); if ( res != NULL - && set_error_attr(res, "code", code) - && set_error_attr(res, "lineno", lineno) - && set_error_attr(res, "offset", column) + && set_error_code(res, code) + && set_error_location(res, "lineno", lineno) + && set_error_location(res, "offset", column) ) { PyErr_SetObject(state->error, res); } @@ -185,8 +191,8 @@ set_xml_error(pyexpat_state *state, do { \ XML_Parser parser = SELF->itself; \ assert(parser != NULL); \ - int lineno = XML_GetErrorLineNumber(parser); \ - int column = XML_GetErrorColumnNumber(parser); \ + XML_Size lineno = XML_GetCurrentLineNumber(parser); \ + XML_Size column = XML_GetCurrentColumnNumber(parser); \ (void)set_xml_error(state, CODE, lineno, column, ERRMSG); \ } while (0) From b01e53d80f545e14851b58d05c43de252995a879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 22 Sep 2025 21:17:43 +0200 Subject: [PATCH 19/32] raise `NotImplementedError` for unavailable mitigation APIs --- Modules/clinic/pyexpat.c.h | 18 +----------------- Modules/pyexpat.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index 8bde10b6c894c2..09faf76f0f6be4 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -409,8 +409,6 @@ pyexpat_xmlparser_UseForeignDTD(PyObject *self, PyTypeObject *cls, PyObject *con #endif /* (XML_COMBINED_VERSION >= 19505) */ -#if (XML_COMBINED_VERSION >= 20702) - PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, "SetAllocTrackerMaximumAmplification($self, max_factor, /)\n" "--\n" @@ -478,10 +476,6 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification(PyObject *self, PyTypeObje return return_value; } -#endif /* (XML_COMBINED_VERSION >= 20702) */ - -#if (XML_COMBINED_VERSION >= 20702) - PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, "SetAllocTrackerActivationThreshold($self, threshold, /)\n" "--\n" @@ -532,8 +526,6 @@ pyexpat_xmlparser_SetAllocTrackerActivationThreshold(PyObject *self, PyTypeObjec return return_value; } -#endif /* (XML_COMBINED_VERSION >= 20702) */ - PyDoc_STRVAR(pyexpat_ParserCreate__doc__, "ParserCreate($module, /, encoding=None, namespace_separator=None,\n" " intern=)\n" @@ -678,12 +670,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ - -#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF - #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF -#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF) */ - -#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF - #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF -#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF) */ -/*[clinic end generated code: output=50bbe2bfee799576 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=585e00dcfea8b399 input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 6d1f52bb0a021f..9f284f1b68eb4e 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1182,7 +1182,6 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, } #endif -#if XML_COMBINED_VERSION >= 20702 /*[clinic input] @permit_long_summary @permit_long_docstring_body @@ -1213,6 +1212,7 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, float max_factor) /*[clinic end generated code: output=6e44bd48c9b112a0 input=23ca8b8f7de04462]*/ { +#if XML_COMBINED_VERSION >= 20702 assert(self->itself != NULL); if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) { Py_RETURN_NONE; @@ -1231,6 +1231,12 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, ? "'max_factor' must be at least 1.0" : "parser must be a root parser"; return set_invalid_arg(state, self, message); +#else + PyErr_SetString(PyExc_NotImplementedError, + "SetAllocTrackerMaximumAmplification() requires " + "Expat 2.7.2 or later"); + return NULL; +#endif } /*[clinic input] @@ -1253,6 +1259,7 @@ pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, unsigned long long threshold) /*[clinic end generated code: output=bed7e93207ba08c5 input=8453509a137a47c0]*/ { +#if XML_COMBINED_VERSION >= 20702 assert(self->itself != NULL); if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) { Py_RETURN_NONE; @@ -1262,8 +1269,13 @@ pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, // by ExternalEntityParserCreate()). pyexpat_state *state = PyType_GetModuleState(cls); return set_invalid_arg(state, self, "parser must be a root parser"); -} +#else + PyErr_SetString(PyExc_NotImplementedError, + "SetAllocTrackerActivationThreshold() requires " + "Expat 2.7.2 or later"); + return NULL; #endif +} static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_PARSE_METHODDEF From cd040bff116ba31d230b29d1c52972656cf462da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 11:03:44 +0200 Subject: [PATCH 20/32] improve various wordings --- Doc/library/pyexpat.rst | 14 +++++++------- Lib/test/test_pyexpat.py | 2 +- Modules/clinic/pyexpat.c.h | 14 +++++++------- Modules/pyexpat.c | 16 ++++++++-------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index dcb87d7e8b8cd9..b79f95ee6e53b8 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -232,24 +232,24 @@ XMLParser Objects :class:`xmlparser` objects have the following methods to mitigate some -well-known XML vulnerabilities. +common XML vulnerabilities. .. method:: xmlparser.SetAllocTrackerMaximumAmplification(max_factor, /) Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated. - By default, parsers objects have a maximum amplification factor of 100. - The amplification factor is calculated as ``allocated / direct`` while parsing, where ``direct`` is the number of bytes read from the primary document in parsing and ``allocated`` is the number of bytes of dynamic memory allocated in the parser hierarchy. The *max_factor* value must be a non-NaN :class:`float` value greater than - or equal to 1.0. Amplifications factors greater than 100 can been observed - near the start of parsing even with benign files in practice. As such, the - activation threshold should be carefully chosen to avoid false positives. + or equal to 1.0. Amplification factors greater than 100.0 can be observed + near the start of parsing even with benign files in practice. In particular, + the activation threshold should be carefully chosen to avoid false positives. + + By default, parser objects have a maximum amplification factor of 100.0. An :exc:`ExpatError` is raised if this method is called on a non-root parser or if *max_factor* is outside the valid range. @@ -269,7 +269,7 @@ well-known XML vulnerabilities. Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. - By default, parsers objects have an allocation activation threshold of 64 MiB, + By default, parser objects have an allocation activation threshold of 64 MiB, or equivalently 67,108,864 bytes. An :exc:`ExpatError` is raised if this method is called on a non-root parser. diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index b41f64131b808c..b28383464abda7 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -868,7 +868,7 @@ def test_set_alloc_tracker_maximum_amplification(self): self.assertIsNone(p.SetAllocTrackerMaximumAmplification(10_000)) self.assertIsNotNone(p.Parse(payload, True)) - def test_set_alloc_tracker_maximum_amplification_infty(self): + def test_set_alloc_tracker_maximum_amplification_infinity(self): inf = float('inf') # an 'inf' threshold is allowed parser = expat.ParserCreate() self.assertIsNone(parser.SetAllocTrackerMaximumAmplification(inf)) diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index 09faf76f0f6be4..b37a0916de84a4 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -415,17 +415,17 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, "\n" "Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.\n" "\n" -"By default, parsers objects have a maximum amplification factor of 100.\n" -"\n" "The amplification factor is calculated as \"allocated / direct\" while parsing,\n" "where \"direct\" is the number of bytes read from the primary document in parsing\n" "and \"allocated\" is the number of bytes of dynamic memory allocated in the parser\n" "hierarchy.\n" "\n" "The \'max_factor\' value must be a non-NaN floating point value greater than\n" -"or equal to 1.0. Amplifications factors greater than 100 can been observed\n" -"near the start of parsing even with benign files in practice. As such, the\n" -"activation threshold should be carefully chosen to avoid false positives."); +"or equal to 1.0. Amplification factors greater than 100.0 can been observed\n" +"near the start of parsing even with benign files in practice. In particular,\n" +"the activation threshold should be carefully chosen to avoid false positives.\n" +"\n" +"By default, parser objects have a maximum amplification factor of 100.0."); #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, @@ -482,7 +482,7 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, "\n" "Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.\n" "\n" -"By default, parsers objects have an allocation activation threshold of 64 MiB."); +"By default, parser objects have an allocation activation threshold of 64 MiB."); #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF \ {"SetAllocTrackerActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__}, @@ -670,4 +670,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=585e00dcfea8b399 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=87b2ec1d055c4e22 input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 9f284f1b68eb4e..a49a4fb1995dc0 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1193,24 +1193,24 @@ pyexpat.xmlparser.SetAllocTrackerMaximumAmplification Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated. -By default, parsers objects have a maximum amplification factor of 100. - The amplification factor is calculated as "allocated / direct" while parsing, where "direct" is the number of bytes read from the primary document in parsing and "allocated" is the number of bytes of dynamic memory allocated in the parser hierarchy. The 'max_factor' value must be a non-NaN floating point value greater than -or equal to 1.0. Amplifications factors greater than 100 can been observed -near the start of parsing even with benign files in practice. As such, the -activation threshold should be carefully chosen to avoid false positives. +or equal to 1.0. Amplification factors greater than 100.0 can be observed +near the start of parsing even with benign files in practice. In particular, +the activation threshold should be carefully chosen to avoid false positives. + +By default, parser objects have a maximum amplification factor of 100.0. [clinic start generated code]*/ static PyObject * pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, PyTypeObject *cls, float max_factor) -/*[clinic end generated code: output=6e44bd48c9b112a0 input=23ca8b8f7de04462]*/ +/*[clinic end generated code: output=6e44bd48c9b112a0 input=e4f48064c79bf323]*/ { #if XML_COMBINED_VERSION >= 20702 assert(self->itself != NULL); @@ -1250,14 +1250,14 @@ pyexpat.xmlparser.SetAllocTrackerActivationThreshold Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. -By default, parsers objects have an allocation activation threshold of 64 MiB. +By default, parser objects have an allocation activation threshold of 64 MiB. [clinic start generated code]*/ static PyObject * pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, PyTypeObject *cls, unsigned long long threshold) -/*[clinic end generated code: output=bed7e93207ba08c5 input=8453509a137a47c0]*/ +/*[clinic end generated code: output=bed7e93207ba08c5 input=54182cd71ad69978]*/ { #if XML_COMBINED_VERSION >= 20702 assert(self->itself != NULL); From a09cd1545be11a0b61de404ee458bca28da97737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 11:07:18 +0200 Subject: [PATCH 21/32] avoid SBOM alteration --- Misc/sbom.spdx.json | 4 ++-- Modules/expat/expat_external.h | 2 -- Modules/pyexpat.c | 2 ++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index a2f8a1da194b73..56bd64267f67dd 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -62,11 +62,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "2f6789397c983be0e8991c16b2cfcda119033eda" + "checksumValue": "c22196e3d8bee88fcdda715623b3b9d2119d2fb3" }, { "algorithm": "SHA256", - "checksumValue": "6d2bf3dbc757c92b3e587ef819ebbb07771325eeee0b5ac8430bc400182db9e9" + "checksumValue": "f2c2283ba03b057e92beefc7f81ba901ebb6dfc1a45b036c8a7d65808eb77a84" } ], "fileName": "Modules/expat/expat_external.h" diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h index f77f3fd20d6865..0f01a05d0e9560 100644 --- a/Modules/expat/expat_external.h +++ b/Modules/expat/expat_external.h @@ -39,8 +39,6 @@ #ifndef Expat_External_INCLUDED # define Expat_External_INCLUDED 1 -/* Required so that functions in expat.h are declared */ -#include "expat_config.h" /* Namespace external symbols to allow multiple libexpat version to co-exist. */ #include "pyexpatns.h" diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index a49a4fb1995dc0..67e1b4d67ce712 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -9,6 +9,8 @@ #include #include // offsetof() + +#include "expat_config.h" #include "expat.h" #include "pyexpat.h" From a3fc3b3724676fc2e645c4fd065d413078155f02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 11:39:01 +0200 Subject: [PATCH 22/32] regen files --- Modules/clinic/pyexpat.c.h | 4 ++-- Modules/pyexpat.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index b37a0916de84a4..5ca656a480fed4 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -421,7 +421,7 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, "hierarchy.\n" "\n" "The \'max_factor\' value must be a non-NaN floating point value greater than\n" -"or equal to 1.0. Amplification factors greater than 100.0 can been observed\n" +"or equal to 1.0. Amplification factors greater than 100.0 can be observed\n" "near the start of parsing even with benign files in practice. In particular,\n" "the activation threshold should be carefully chosen to avoid false positives.\n" "\n" @@ -670,4 +670,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=87b2ec1d055c4e22 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=920163f8a39927b5 input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 67e1b4d67ce712..6f2b10b1bb0046 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1212,7 +1212,7 @@ static PyObject * pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, PyTypeObject *cls, float max_factor) -/*[clinic end generated code: output=6e44bd48c9b112a0 input=e4f48064c79bf323]*/ +/*[clinic end generated code: output=6e44bd48c9b112a0 input=3544abf9dd7ae055]*/ { #if XML_COMBINED_VERSION >= 20702 assert(self->itself != NULL); From 5afe1adbf7436fff58dd06522573cb98b0fdb3bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 11:39:16 +0200 Subject: [PATCH 23/32] improve test assertions --- Lib/test/test_pyexpat.py | 48 +++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index b28383464abda7..ec38419ce47335 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -847,10 +847,25 @@ def exponential_expansion_payload(self, ncols, nrows, text='.'): &row{nrows}; """ + # With the default Expat configuration, the billion laughs protection may + # hit before the allocation limiter if exponential_expansion_payload() is + # not carefully parametrized. In particular, use the following assert_*() + # methods to check the error message of the active protection. + + def assert_root_parser_failure(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) is invalid for a sub-parser.""" + msg = "parser must be a root parser" + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + def assert_alloc_limit_reached(self, func, /, *args, **kwargs): + """Check that fnuc(*args, **kwargs) hits the allocation limit.""" + msg = r"out of memory: line \d+, column \d+" + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + def test_set_alloc_tracker_maximum_amplification(self): # On WASI, the maximum amplification factor of the payload may differ, # so we craft a payload that is likely to yield an amplification factor - # way larger than 1.0 and way smaller than 10^5. + # way larger than 1.0 and way smaller than 10^4. payload = self.exponential_expansion_payload(1, 2) p = expat.ParserCreate() @@ -858,22 +873,29 @@ def test_set_alloc_tracker_maximum_amplification(self): p.SetAllocTrackerActivationThreshold(0) # Use a max amplification factor likely to be below the real one. self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) - msg = r"out of memory: line \d+, column \d+" - self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload, True) + self.assert_alloc_limit_reached(p.Parse, payload, True) # Re-create a parser as the current parser is now in an error state. p = expat.ParserCreate() # Unconditionally enable maximum amplification factor. p.SetAllocTrackerActivationThreshold(0) + # Use a max amplification factor likely to be above the real one. self.assertIsNone(p.SetAllocTrackerMaximumAmplification(10_000)) self.assertIsNotNone(p.Parse(payload, True)) def test_set_alloc_tracker_maximum_amplification_infinity(self): - inf = float('inf') # an 'inf' threshold is allowed + inf = float('inf') # an 'inf' threshold is allowed by Expat parser = expat.ParserCreate() self.assertIsNone(parser.SetAllocTrackerMaximumAmplification(inf)) - def test_set_alloc_tracker_maximum_amplification_fail_for_subparser(self): + def test_set_alloc_tracker_maximum_amplification_arg_invalid_type(self): + parser = expat.ParserCreate() + f = parser.SetAllocTrackerMaximumAmplification + + self.assertRaises(TypeError, f, None) + self.assertRaises(TypeError, f, 'abc') + + def test_set_alloc_tracker_maximum_amplification_arg_invalid_range(self): parser = expat.ParserCreate() f = parser.SetAllocTrackerMaximumAmplification @@ -881,10 +903,11 @@ def test_set_alloc_tracker_maximum_amplification_fail_for_subparser(self): self.assertRaisesRegex(expat.ExpatError, msg, f, float('nan')) self.assertRaisesRegex(expat.ExpatError, msg, f, 0.99) + def test_set_alloc_tracker_maximum_amplification_fail_for_subparser(self): + parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) fsub = subparser.SetAllocTrackerMaximumAmplification - msg = "parser must be a root parser" - self.assertRaisesRegex(expat.ExpatError, msg, fsub, 1.0) + self.assert_root_parser_failure(fsub, 123.45) def test_set_alloc_tracker_activation_threshold(self): # The payload is expected to have a peak allocation of @@ -901,17 +924,17 @@ def test_set_alloc_tracker_activation_threshold(self): p.SetAllocTrackerActivationThreshold(2) # Check that we always reach the activation threshold. self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) - msg = r"out of memory: line \d+, column \d+" - self.assertRaisesRegex(expat.ExpatError, msg, p.Parse, payload, True) + self.assert_alloc_limit_reached(p.Parse, payload, True) - def test_set_alloc_tracker_activation_threshold_arg_invalid(self): + def test_set_alloc_tracker_activation_threshold_arg_invalid_type(self): parser = expat.ParserCreate() f = parser.SetAllocTrackerActivationThreshold + self.assertRaises(TypeError, f, 1.0) self.assertRaises(TypeError, f, -1.5) self.assertRaises(ValueError, f, -5) - def test_set_alloc_tracker_activation_threshold_arg_overflow(self): + def test_set_alloc_tracker_activation_threshold_arg_invalid_range(self): _testcapi = import_helper.import_module("_testcapi") parser = expat.ParserCreate() f = parser.SetAllocTrackerActivationThreshold @@ -921,8 +944,7 @@ def test_set_alloc_tracker_activation_threshold_fail_for_subparser(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) fsub = subparser.SetAllocTrackerActivationThreshold - msg = "parser must be a root parser" - self.assertRaisesRegex(expat.ExpatError, msg, fsub, 12345) + self.assert_root_parser_failure(fsub, 12345) if __name__ == "__main__": From b6949dd992ea9943735cab6ccce4c7c0b588a839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 17:20:20 +0200 Subject: [PATCH 24/32] improve test genericity --- Lib/test/test_pyexpat.py | 180 ++++++++++++++++++++++----------------- 1 file changed, 104 insertions(+), 76 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index ec38419ce47335..15ab1ce6a7e754 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -1,12 +1,12 @@ # XXX TypeErrors on calling handlers, or on bad return values from a # handler, are obscure and unhelpful. +import functools import os import re import sys import sysconfig import unittest -import textwrap import traceback from io import BytesIO from test import support @@ -823,128 +823,156 @@ def start_element(name, _): self.assertEqual(started, ['doc']) -class AttackProtectionTest(unittest.TestCase): +class AttackProtectionTestCases: + """Generic interface for testing XML Expat protections. - def exponential_expansion_payload(self, ncols, nrows, text='.'): + The protections being tested should mitigate attacks based + on Billion Laughs payloads. + """ + + @staticmethod + def exponential_expansion_payload(ncols, nrows, text='.'): """Create a billion laughs attack payload. Be careful: the number of total items is pow(n, k), thereby requiring at least pow(ncols, nrows) * sizeof(text) memory! """ - # 'indent' affects the peak amplification factor and allocation - indent = ' ' * 2 - body = textwrap.indent('\n'.join( + body = '\n'.join( f'' for i in range(nrows) - ), indent) + ) return f"""\ -{indent} + + {body} ]> -&row{nrows}; -""" - - # With the default Expat configuration, the billion laughs protection may - # hit before the allocation limiter if exponential_expansion_payload() is - # not carefully parametrized. In particular, use the following assert_*() - # methods to check the error message of the active protection. +&row{nrows};""" def assert_root_parser_failure(self, func, /, *args, **kwargs): """Check that func(*args, **kwargs) is invalid for a sub-parser.""" msg = "parser must be a root parser" self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) - def assert_alloc_limit_reached(self, func, /, *args, **kwargs): - """Check that fnuc(*args, **kwargs) hits the allocation limit.""" - msg = r"out of memory: line \d+, column \d+" - self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + def assert_active_protection(self, func, /, *args, **kwargs): + """Assert that func(*args, **kwargs) triggers the attack protection.""" + raise NotImplementedError - def test_set_alloc_tracker_maximum_amplification(self): - # On WASI, the maximum amplification factor of the payload may differ, - # so we craft a payload that is likely to yield an amplification factor - # way larger than 1.0 and way smaller than 10^4. - payload = self.exponential_expansion_payload(1, 2) + def set_activation_threshold(self, parser, threshold): + """Set the activation threshold for the tested protection.""" + raise NotImplementedError - p = expat.ParserCreate() - # Unconditionally enable maximum amplification factor. - p.SetAllocTrackerActivationThreshold(0) - # Use a max amplification factor likely to be below the real one. - self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) - self.assert_alloc_limit_reached(p.Parse, payload, True) + def set_maximum_amplification(self, parser, max_factor): + """Set the maximum amplification factor for the tested protection.""" + raise NotImplementedError - # Re-create a parser as the current parser is now in an error state. - p = expat.ParserCreate() - # Unconditionally enable maximum amplification factor. - p.SetAllocTrackerActivationThreshold(0) - # Use a max amplification factor likely to be above the real one. - self.assertIsNone(p.SetAllocTrackerMaximumAmplification(10_000)) - self.assertIsNotNone(p.Parse(payload, True)) + def test_set_maximum_amplification_reached(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to always be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + # Craft a payload for which the peak amplification factor is > 1.0. + payload = self.exponential_expansion_payload(1, 2) + self.assert_active_protection(parser.Parse, payload, True) - def test_set_alloc_tracker_maximum_amplification_infinity(self): + def test_set_maximum_amplification_ignored(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to never be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) + # Craft a payload for which the peak amplification factor is < 1e4. + payload = self.exponential_expansion_payload(1, 2) + self.assertIsNotNone(parser.Parse(payload, True)) + + def test_set_maximum_amplification_infinity(self): inf = float('inf') # an 'inf' threshold is allowed by Expat parser = expat.ParserCreate() - self.assertIsNone(parser.SetAllocTrackerMaximumAmplification(inf)) + self.assertIsNone(self.set_maximum_amplification(parser, inf)) - def test_set_alloc_tracker_maximum_amplification_arg_invalid_type(self): + def test_set_maximum_amplification_arg_invalid_type(self): parser = expat.ParserCreate() - f = parser.SetAllocTrackerMaximumAmplification + setter = functools.partial(self.set_maximum_amplification, parser) - self.assertRaises(TypeError, f, None) - self.assertRaises(TypeError, f, 'abc') + self.assertRaises(TypeError, setter, None) + self.assertRaises(TypeError, setter, 'abc') - def test_set_alloc_tracker_maximum_amplification_arg_invalid_range(self): + def test_set_maximum_amplification_arg_invalid_range(self): parser = expat.ParserCreate() - f = parser.SetAllocTrackerMaximumAmplification + setter = functools.partial(self.set_maximum_amplification, parser) msg = re.escape("'max_factor' must be at least 1.0") - self.assertRaisesRegex(expat.ExpatError, msg, f, float('nan')) - self.assertRaisesRegex(expat.ExpatError, msg, f, 0.99) + self.assertRaisesRegex(expat.ExpatError, msg, setter, float('nan')) + self.assertRaisesRegex(expat.ExpatError, msg, setter, 0.99) - def test_set_alloc_tracker_maximum_amplification_fail_for_subparser(self): + def test_set_maximum_amplification_fail_for_subparser(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) - fsub = subparser.SetAllocTrackerMaximumAmplification - self.assert_root_parser_failure(fsub, 123.45) + setter = functools.partial(self.set_maximum_amplification, subparser) + self.assert_root_parser_failure(setter, 123.45) - def test_set_alloc_tracker_activation_threshold(self): - # The payload is expected to have a peak allocation of - # at least 3 bytes but strictly less than 10^5 bytes. + def test_set_attack_protection_threshold_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be always reached. + self.set_activation_threshold(parser, 3) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) payload = self.exponential_expansion_payload(10, 4) + self.assert_active_protection(parser.Parse, payload, True) - p = expat.ParserCreate() - p.SetAllocTrackerActivationThreshold(pow(10, 5)) - self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) - # Check that we never reach the activation threshold. - self.assertIsNotNone(p.Parse(payload, True)) - - p = expat.ParserCreate() - p.SetAllocTrackerActivationThreshold(2) - # Check that we always reach the activation threshold. - self.assertIsNone(p.SetAllocTrackerMaximumAmplification(1.0)) - self.assert_alloc_limit_reached(p.Parse, payload, True) + def test_set_attack_protection_threshold_ignored(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be never reached. + self.set_activation_threshold(parser, pow(10, 5)) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(10, 4) + self.assertIsNotNone(parser.Parse(payload, True)) - def test_set_alloc_tracker_activation_threshold_arg_invalid_type(self): + def test_set_attack_protection_threshold_arg_invalid_type(self): parser = expat.ParserCreate() - f = parser.SetAllocTrackerActivationThreshold + setter = functools.partial(self.set_activation_threshold, parser) - self.assertRaises(TypeError, f, 1.0) - self.assertRaises(TypeError, f, -1.5) - self.assertRaises(ValueError, f, -5) + self.assertRaises(TypeError, setter, 1.0) + self.assertRaises(TypeError, setter, -1.5) + self.assertRaises(ValueError, setter, -5) - def test_set_alloc_tracker_activation_threshold_arg_invalid_range(self): + def test_set_attack_protection_threshold_arg_invalid_range(self): _testcapi = import_helper.import_module("_testcapi") parser = expat.ParserCreate() - f = parser.SetAllocTrackerActivationThreshold - self.assertRaises(OverflowError, f, _testcapi.ULLONG_MAX + 1) + setter = functools.partial(self.set_activation_threshold, parser) - def test_set_alloc_tracker_activation_threshold_fail_for_subparser(self): + self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1) + + def test_set_attack_protection_threshold_fail_for_subparser(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) - fsub = subparser.SetAllocTrackerActivationThreshold - self.assert_root_parser_failure(fsub, 12345) + setter = functools.partial(self.set_activation_threshold, subparser) + self.assert_root_parser_failure(setter, 12345) + + +@unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2") +class MemoryProtectionTest(AttackProtectionTestCases, unittest.TestCase): + + # With the default Expat configuration, the billion laughs protection may + # hit before the allocation limiter if exponential_expansion_payload() is + # not carefully parametrized. In particular, use the following assert_*() + # methods to check the error message of the active protection. + + def assert_active_protection(self, func, /, *args, **kwargs): + """Check that fnuc(*args, **kwargs) hits the allocation limit.""" + msg = r"out of memory: line \d+, column \d+" + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + def set_maximum_amplification(self, parser, max_factor): + return parser.SetAllocTrackerMaximumAmplification(max_factor) + + def set_activation_threshold(self, parser, threshold): + return parser.SetAllocTrackerActivationThreshold(threshold) if __name__ == "__main__": From bdbd3827c1f51d52f17ef1d0b2c946929dce0cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 17:33:54 +0200 Subject: [PATCH 25/32] split tests even if they could end up duplicated --- Lib/test/test_pyexpat.py | 76 +++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 15ab1ce6a7e754..ed778e397963e1 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -868,24 +868,10 @@ def set_maximum_amplification(self, parser, max_factor): raise NotImplementedError def test_set_maximum_amplification_reached(self): - parser = expat.ParserCreate() - # Unconditionally enable maximum activation factor. - self.set_activation_threshold(parser, 0) - # Choose a max amplification factor expected to always be exceeded. - self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - # Craft a payload for which the peak amplification factor is > 1.0. - payload = self.exponential_expansion_payload(1, 2) - self.assert_active_protection(parser.Parse, payload, True) + raise NotImplementedError def test_set_maximum_amplification_ignored(self): - parser = expat.ParserCreate() - # Unconditionally enable maximum activation factor. - self.set_activation_threshold(parser, 0) - # Choose a max amplification factor expected to never be exceeded. - self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) - # Craft a payload for which the peak amplification factor is < 1e4. - payload = self.exponential_expansion_payload(1, 2) - self.assertIsNotNone(parser.Parse(payload, True)) + raise NotImplementedError def test_set_maximum_amplification_infinity(self): inf = float('inf') # an 'inf' threshold is allowed by Expat @@ -914,24 +900,10 @@ def test_set_maximum_amplification_fail_for_subparser(self): self.assert_root_parser_failure(setter, 123.45) def test_set_attack_protection_threshold_reached(self): - parser = expat.ParserCreate() - # Choose a threshold expected to be always reached. - self.set_activation_threshold(parser, 3) - # Check that the threshold is reached by choosing a small factor - # and a payload whose peak amplification factor exceeds it. - self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - payload = self.exponential_expansion_payload(10, 4) - self.assert_active_protection(parser.Parse, payload, True) + raise NotImplementedError def test_set_attack_protection_threshold_ignored(self): - parser = expat.ParserCreate() - # Choose a threshold expected to be never reached. - self.set_activation_threshold(parser, pow(10, 5)) - # Check that the threshold is reached by choosing a small factor - # and a payload whose peak amplification factor exceeds it. - self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - payload = self.exponential_expansion_payload(10, 4) - self.assertIsNotNone(parser.Parse(payload, True)) + raise NotImplementedError def test_set_attack_protection_threshold_arg_invalid_type(self): parser = expat.ParserCreate() @@ -974,6 +946,46 @@ def set_maximum_amplification(self, parser, max_factor): def set_activation_threshold(self, parser, threshold): return parser.SetAllocTrackerActivationThreshold(threshold) + def test_set_maximum_amplification_reached(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to always be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + # Craft a payload for which the peak amplification factor is > 1.0. + payload = self.exponential_expansion_payload(1, 2) + self.assert_active_protection(parser.Parse, payload, True) + + def test_set_maximum_amplification_ignored(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to never be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) + # Craft a payload for which the peak amplification factor is < 1e4. + payload = self.exponential_expansion_payload(1, 2) + self.assertIsNotNone(parser.Parse(payload, True)) + + def test_set_attack_protection_threshold_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be always reached. + self.set_activation_threshold(parser, 3) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(10, 4) + self.assert_active_protection(parser.Parse, payload, True) + + def test_set_attack_protection_threshold_ignored(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be never reached. + self.set_activation_threshold(parser, pow(10, 5)) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(10, 4) + self.assertIsNotNone(parser.Parse(payload, True)) + if __name__ == "__main__": unittest.main() From 0c03735a64220e67a84c8335050324f334557c78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 18:31:18 +0200 Subject: [PATCH 26/32] raise AttributeError when method is not available and reorder interface --- Lib/test/test_pyexpat.py | 98 ++++++++++++++++---------------- Modules/clinic/pyexpat.c.h | 112 +++++++++++++++++++++---------------- Modules/pyexpat.c | 80 ++++++++++++-------------- 3 files changed, 148 insertions(+), 142 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index ed778e397963e1..bffb9c13e2b4d1 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -867,6 +867,33 @@ def set_maximum_amplification(self, parser, max_factor): """Set the maximum amplification factor for the tested protection.""" raise NotImplementedError + def test_set_attack_protection_threshold_reached(self): + raise NotImplementedError + + def test_set_attack_protection_threshold_ignored(self): + raise NotImplementedError + + def test_set_attack_protection_threshold_arg_invalid_type(self): + parser = expat.ParserCreate() + setter = functools.partial(self.set_activation_threshold, parser) + + self.assertRaises(TypeError, setter, 1.0) + self.assertRaises(TypeError, setter, -1.5) + self.assertRaises(ValueError, setter, -5) + + def test_set_attack_protection_threshold_arg_invalid_range(self): + _testcapi = import_helper.import_module("_testcapi") + parser = expat.ParserCreate() + setter = functools.partial(self.set_activation_threshold, parser) + + self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1) + + def test_set_attack_protection_threshold_fail_for_subparser(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + setter = functools.partial(self.set_activation_threshold, subparser) + self.assert_root_parser_failure(setter, 12345) + def test_set_maximum_amplification_reached(self): raise NotImplementedError @@ -899,33 +926,6 @@ def test_set_maximum_amplification_fail_for_subparser(self): setter = functools.partial(self.set_maximum_amplification, subparser) self.assert_root_parser_failure(setter, 123.45) - def test_set_attack_protection_threshold_reached(self): - raise NotImplementedError - - def test_set_attack_protection_threshold_ignored(self): - raise NotImplementedError - - def test_set_attack_protection_threshold_arg_invalid_type(self): - parser = expat.ParserCreate() - setter = functools.partial(self.set_activation_threshold, parser) - - self.assertRaises(TypeError, setter, 1.0) - self.assertRaises(TypeError, setter, -1.5) - self.assertRaises(ValueError, setter, -5) - - def test_set_attack_protection_threshold_arg_invalid_range(self): - _testcapi = import_helper.import_module("_testcapi") - parser = expat.ParserCreate() - setter = functools.partial(self.set_activation_threshold, parser) - - self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1) - - def test_set_attack_protection_threshold_fail_for_subparser(self): - parser = expat.ParserCreate() - subparser = parser.ExternalEntityParserCreate(None) - setter = functools.partial(self.set_activation_threshold, subparser) - self.assert_root_parser_failure(setter, 12345) - @unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2") class MemoryProtectionTest(AttackProtectionTestCases, unittest.TestCase): @@ -940,31 +940,11 @@ def assert_active_protection(self, func, /, *args, **kwargs): msg = r"out of memory: line \d+, column \d+" self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) - def set_maximum_amplification(self, parser, max_factor): - return parser.SetAllocTrackerMaximumAmplification(max_factor) - def set_activation_threshold(self, parser, threshold): return parser.SetAllocTrackerActivationThreshold(threshold) - def test_set_maximum_amplification_reached(self): - parser = expat.ParserCreate() - # Unconditionally enable maximum activation factor. - self.set_activation_threshold(parser, 0) - # Choose a max amplification factor expected to always be exceeded. - self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - # Craft a payload for which the peak amplification factor is > 1.0. - payload = self.exponential_expansion_payload(1, 2) - self.assert_active_protection(parser.Parse, payload, True) - - def test_set_maximum_amplification_ignored(self): - parser = expat.ParserCreate() - # Unconditionally enable maximum activation factor. - self.set_activation_threshold(parser, 0) - # Choose a max amplification factor expected to never be exceeded. - self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) - # Craft a payload for which the peak amplification factor is < 1e4. - payload = self.exponential_expansion_payload(1, 2) - self.assertIsNotNone(parser.Parse(payload, True)) + def set_maximum_amplification(self, parser, max_factor): + return parser.SetAllocTrackerMaximumAmplification(max_factor) def test_set_attack_protection_threshold_reached(self): parser = expat.ParserCreate() @@ -986,6 +966,26 @@ def test_set_attack_protection_threshold_ignored(self): payload = self.exponential_expansion_payload(10, 4) self.assertIsNotNone(parser.Parse(payload, True)) + def test_set_maximum_amplification_reached(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to always be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + # Craft a payload for which the peak amplification factor is > 1.0. + payload = self.exponential_expansion_payload(1, 2) + self.assert_active_protection(parser.Parse, payload, True) + + def test_set_maximum_amplification_ignored(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to never be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) + # Craft a payload for which the peak amplification factor is < 1e4. + payload = self.exponential_expansion_payload(1, 2) + self.assertIsNotNone(parser.Parse(payload, True)) + if __name__ == "__main__": unittest.main() diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index 5ca656a480fed4..e178547060446e 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -409,34 +409,26 @@ pyexpat_xmlparser_UseForeignDTD(PyObject *self, PyTypeObject *cls, PyObject *con #endif /* (XML_COMBINED_VERSION >= 19505) */ -PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, -"SetAllocTrackerMaximumAmplification($self, max_factor, /)\n" +#if (XML_COMBINED_VERSION >= 20702) + +PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, +"SetAllocTrackerActivationThreshold($self, threshold, /)\n" "--\n" "\n" -"Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.\n" -"\n" -"The amplification factor is calculated as \"allocated / direct\" while parsing,\n" -"where \"direct\" is the number of bytes read from the primary document in parsing\n" -"and \"allocated\" is the number of bytes of dynamic memory allocated in the parser\n" -"hierarchy.\n" -"\n" -"The \'max_factor\' value must be a non-NaN floating point value greater than\n" -"or equal to 1.0. Amplification factors greater than 100.0 can be observed\n" -"near the start of parsing even with benign files in practice. In particular,\n" -"the activation threshold should be carefully chosen to avoid false positives.\n" +"Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.\n" "\n" -"By default, parser objects have a maximum amplification factor of 100.0."); +"By default, parser objects have an allocation activation threshold of 64 MiB."); -#define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ - {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, +#define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF \ + {"SetAllocTrackerActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__}, static PyObject * -pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, - PyTypeObject *cls, - float max_factor); +pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold); static PyObject * -pyexpat_xmlparser_SetAllocTrackerMaximumAmplification(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +pyexpat_xmlparser_SetAllocTrackerActivationThreshold(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -448,52 +440,59 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification(PyObject *self, PyTypeObje static const char * const _keywords[] = {"", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "SetAllocTrackerMaximumAmplification", + .fname = "SetAllocTrackerActivationThreshold", .kwtuple = KWTUPLE, }; #undef KWTUPLE PyObject *argsbuf[1]; - float max_factor; + unsigned long long threshold; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - if (PyFloat_CheckExact(args[0])) { - max_factor = (float) (PyFloat_AS_DOUBLE(args[0])); - } - else - { - max_factor = (float) PyFloat_AsDouble(args[0]); - if (max_factor == -1.0 && PyErr_Occurred()) { - goto exit; - } + if (!_PyLong_UnsignedLongLong_Converter(args[0], &threshold)) { + goto exit; } - return_value = pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl((xmlparseobject *)self, cls, max_factor); + return_value = pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl((xmlparseobject *)self, cls, threshold); exit: return return_value; } -PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, -"SetAllocTrackerActivationThreshold($self, threshold, /)\n" +#endif /* (XML_COMBINED_VERSION >= 20702) */ + +#if (XML_COMBINED_VERSION >= 20702) + +PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, +"SetAllocTrackerMaximumAmplification($self, max_factor, /)\n" "--\n" "\n" -"Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.\n" +"Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.\n" "\n" -"By default, parser objects have an allocation activation threshold of 64 MiB."); +"The amplification factor is calculated as \"allocated / direct\" while parsing,\n" +"where \"direct\" is the number of bytes read from the primary document in parsing\n" +"and \"allocated\" is the number of bytes of dynamic memory allocated in the parser\n" +"hierarchy.\n" +"\n" +"The \'max_factor\' value must be a non-NaN floating point value greater than\n" +"or equal to 1.0. Amplification factors greater than 100.0 can be observed\n" +"near the start of parsing even with benign files in practice. In particular,\n" +"the activation threshold should be carefully chosen to avoid false positives.\n" +"\n" +"By default, parser objects have a maximum amplification factor of 100.0."); -#define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF \ - {"SetAllocTrackerActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__}, +#define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ + {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, static PyObject * -pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, - PyTypeObject *cls, - unsigned long long threshold); +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, + PyTypeObject *cls, + float max_factor); static PyObject * -pyexpat_xmlparser_SetAllocTrackerActivationThreshold(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -505,27 +504,36 @@ pyexpat_xmlparser_SetAllocTrackerActivationThreshold(PyObject *self, PyTypeObjec static const char * const _keywords[] = {"", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "SetAllocTrackerActivationThreshold", + .fname = "SetAllocTrackerMaximumAmplification", .kwtuple = KWTUPLE, }; #undef KWTUPLE PyObject *argsbuf[1]; - unsigned long long threshold; + float max_factor; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - if (!_PyLong_UnsignedLongLong_Converter(args[0], &threshold)) { - goto exit; + if (PyFloat_CheckExact(args[0])) { + max_factor = (float) (PyFloat_AS_DOUBLE(args[0])); } - return_value = pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl((xmlparseobject *)self, cls, threshold); + else + { + max_factor = (float) PyFloat_AsDouble(args[0]); + if (max_factor == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + return_value = pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl((xmlparseobject *)self, cls, max_factor); exit: return return_value; } +#endif /* (XML_COMBINED_VERSION >= 20702) */ + PyDoc_STRVAR(pyexpat_ParserCreate__doc__, "ParserCreate($module, /, encoding=None, namespace_separator=None,\n" " intern=)\n" @@ -670,4 +678,12 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=920163f8a39927b5 input=a9049054013a1b77]*/ + +#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF + #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF) */ + +#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF + #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF) */ +/*[clinic end generated code: output=e73935658c04c83e input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 6f2b10b1bb0046..a19d125311bb77 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1184,6 +1184,40 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, } #endif +#if XML_COMBINED_VERSION >= 20702 +/*[clinic input] +@permit_long_summary +@permit_long_docstring_body +pyexpat.xmlparser.SetAllocTrackerActivationThreshold + + cls: defining_class + threshold: unsigned_long_long + / + +Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. + +By default, parser objects have an allocation activation threshold of 64 MiB. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold) +/*[clinic end generated code: output=bed7e93207ba08c5 input=54182cd71ad69978]*/ +{ + assert(self->itself != NULL); + if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) { + Py_RETURN_NONE; + } + // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself + // is not a root parser (currently, this is equivalent to be created + // by ExternalEntityParserCreate()). + pyexpat_state *state = PyType_GetModuleState(cls); + return set_invalid_arg(state, self, "parser must be a root parser"); +} +#endif + +#if XML_COMBINED_VERSION >= 20702 /*[clinic input] @permit_long_summary @permit_long_docstring_body @@ -1214,7 +1248,6 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, float max_factor) /*[clinic end generated code: output=6e44bd48c9b112a0 input=3544abf9dd7ae055]*/ { -#if XML_COMBINED_VERSION >= 20702 assert(self->itself != NULL); if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) { Py_RETURN_NONE; @@ -1233,51 +1266,8 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, ? "'max_factor' must be at least 1.0" : "parser must be a root parser"; return set_invalid_arg(state, self, message); -#else - PyErr_SetString(PyExc_NotImplementedError, - "SetAllocTrackerMaximumAmplification() requires " - "Expat 2.7.2 or later"); - return NULL; -#endif } - -/*[clinic input] -@permit_long_summary -@permit_long_docstring_body -pyexpat.xmlparser.SetAllocTrackerActivationThreshold - - cls: defining_class - threshold: unsigned_long_long - / - -Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. - -By default, parser objects have an allocation activation threshold of 64 MiB. -[clinic start generated code]*/ - -static PyObject * -pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, - PyTypeObject *cls, - unsigned long long threshold) -/*[clinic end generated code: output=bed7e93207ba08c5 input=54182cd71ad69978]*/ -{ -#if XML_COMBINED_VERSION >= 20702 - assert(self->itself != NULL); - if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) { - Py_RETURN_NONE; - } - // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself - // is not a root parser (currently, this is equivalent to be created - // by ExternalEntityParserCreate()). - pyexpat_state *state = PyType_GetModuleState(cls); - return set_invalid_arg(state, self, "parser must be a root parser"); -#else - PyErr_SetString(PyExc_NotImplementedError, - "SetAllocTrackerActivationThreshold() requires " - "Expat 2.7.2 or later"); - return NULL; #endif -} static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_PARSE_METHODDEF @@ -1288,8 +1278,8 @@ static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF - PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF + PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF {NULL, NULL} /* sentinel */ From 220f3e2885e87b77230a875045029be37465725f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Sep 2025 18:34:21 +0200 Subject: [PATCH 27/32] reoder docs --- Doc/library/pyexpat.rst | 28 +++++++++---------- Doc/whatsnew/3.15.rst | 4 +-- ...5-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 4 +-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index b79f95ee6e53b8..b515a0287fca71 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -234,6 +234,20 @@ XMLParser Objects :class:`xmlparser` objects have the following methods to mitigate some common XML vulnerabilities. +.. method:: xmlparser.SetAllocTrackerActivationThreshold(threshold, /) + + Sets the number of allocated bytes of dynamic memory needed to activate + protection against disproportionate use of RAM. + + By default, parser objects have an allocation activation threshold of 64 MiB, + or equivalently 67,108,864 bytes. + + An :exc:`ExpatError` is raised if this method is called on a non-root parser. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` + should not be used as they may have no special meaning. + + .. versionadded:: next + .. method:: xmlparser.SetAllocTrackerMaximumAmplification(max_factor, /) Sets the maximum amplification factor between direct input and bytes @@ -264,20 +278,6 @@ common XML vulnerabilities. .. versionadded:: next -.. method:: xmlparser.SetAllocTrackerActivationThreshold(threshold, /) - - Sets the number of allocated bytes of dynamic memory needed to activate - protection against disproportionate use of RAM. - - By default, parser objects have an allocation activation threshold of 64 MiB, - or equivalently 67,108,864 bytes. - - An :exc:`ExpatError` is raised if this method is called on a non-root parser. - The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` - should not be used as they may have no special meaning. - - .. versionadded:: next - :class:`xmlparser` objects have the following attributes: diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index c431981eea895f..ce27ab2eff1a1a 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -543,8 +543,8 @@ unittest xml.parsers.expat ----------------- -* Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` - and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` +* Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` + and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. (Contributed by Bénédikt Tran in :gh:`90949`.) diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst index 5baa3ea347065d..eb715ebc0b02a0 100644 --- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -1,5 +1,5 @@ -Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` -and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` +Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` +and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. Patch by Bénédikt Tran. From 9d538e416d962d6649c69c5797b5fb343e2c8237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Sep 2025 11:27:23 +0200 Subject: [PATCH 28/32] address review --- Lib/test/test_pyexpat.py | 83 +++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index bffb9c13e2b4d1..99bfe0b782ec6d 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -1,6 +1,7 @@ # XXX TypeErrors on calling handlers, or on bad return values from a # handler, are obscure and unhelpful. +import abc import functools import os import re @@ -823,11 +824,13 @@ def start_element(name, _): self.assertEqual(started, ['doc']) -class AttackProtectionTestCases: - """Generic interface for testing XML Expat protections. +class AttackProtectionTestBase(abc.ABC): + """ + Base class for testing protections against XML payloads with + disproportionate amplification. - The protections being tested should mitigate attacks based - on Billion Laughs payloads. + The protections being tested should detect and prevent attacks + that leverage disproportionate amplification from small inputs. """ @staticmethod @@ -855,25 +858,32 @@ def assert_root_parser_failure(self, func, /, *args, **kwargs): msg = "parser must be a root parser" self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) - def assert_active_protection(self, func, /, *args, **kwargs): - """Assert that func(*args, **kwargs) triggers the attack protection.""" - raise NotImplementedError + @abc.abstractmethod + def assert_rejected(self, func, /, *args, **kwargs): + """Assert that func(*args, **kwargs) triggers the attack protection. + + Note: this method must ensure that the attack protection being tested + is the one that is actually triggered at runtime, e.g., by matching + the exact error message. + """ + @abc.abstractmethod def set_activation_threshold(self, parser, threshold): """Set the activation threshold for the tested protection.""" - raise NotImplementedError + @abc.abstractmethod def set_maximum_amplification(self, parser, max_factor): """Set the maximum amplification factor for the tested protection.""" - raise NotImplementedError - def test_set_attack_protection_threshold_reached(self): - raise NotImplementedError + @abc.abstractmethod + def test_set_activation_threshold__threshold_reached(self): + """Test when the activation threshold is exceeded.""" - def test_set_attack_protection_threshold_ignored(self): - raise NotImplementedError + @abc.abstractmethod + def test_set_activation_threshold__threshold_not_reached(self): + """Test when the activation threshold is not exceeded.""" - def test_set_attack_protection_threshold_arg_invalid_type(self): + def test_set_activation_threshold__invalid_threshold_type(self): parser = expat.ParserCreate() setter = functools.partial(self.set_activation_threshold, parser) @@ -881,38 +891,40 @@ def test_set_attack_protection_threshold_arg_invalid_type(self): self.assertRaises(TypeError, setter, -1.5) self.assertRaises(ValueError, setter, -5) - def test_set_attack_protection_threshold_arg_invalid_range(self): + def test_set_activation_threshold__invalid_threshold_range(self): _testcapi = import_helper.import_module("_testcapi") parser = expat.ParserCreate() setter = functools.partial(self.set_activation_threshold, parser) self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1) - def test_set_attack_protection_threshold_fail_for_subparser(self): + def test_set_activation_threshold__fail_for_subparser(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) setter = functools.partial(self.set_activation_threshold, subparser) self.assert_root_parser_failure(setter, 12345) - def test_set_maximum_amplification_reached(self): - raise NotImplementedError + @abc.abstractmethod + def test_set_maximum_amplification__amplification_exceeded(self): + """Test when the amplification factor is exceeded.""" - def test_set_maximum_amplification_ignored(self): - raise NotImplementedError + @abc.abstractmethod + def test_set_maximum_amplification__amplification_not_exceeded(self): + """Test when the amplification factor is not exceeded.""" - def test_set_maximum_amplification_infinity(self): + def test_set_maximum_amplification__infinity(self): inf = float('inf') # an 'inf' threshold is allowed by Expat parser = expat.ParserCreate() self.assertIsNone(self.set_maximum_amplification(parser, inf)) - def test_set_maximum_amplification_arg_invalid_type(self): + def test_set_maximum_amplification__invalid_max_factor_type(self): parser = expat.ParserCreate() setter = functools.partial(self.set_maximum_amplification, parser) self.assertRaises(TypeError, setter, None) self.assertRaises(TypeError, setter, 'abc') - def test_set_maximum_amplification_arg_invalid_range(self): + def test_set_maximum_amplification__invalid_max_factor_range(self): parser = expat.ParserCreate() setter = functools.partial(self.set_maximum_amplification, parser) @@ -920,7 +932,7 @@ def test_set_maximum_amplification_arg_invalid_range(self): self.assertRaisesRegex(expat.ExpatError, msg, setter, float('nan')) self.assertRaisesRegex(expat.ExpatError, msg, setter, 0.99) - def test_set_maximum_amplification_fail_for_subparser(self): + def test_set_maximum_amplification__fail_for_subparser(self): parser = expat.ParserCreate() subparser = parser.ExternalEntityParserCreate(None) setter = functools.partial(self.set_maximum_amplification, subparser) @@ -928,15 +940,16 @@ def test_set_maximum_amplification_fail_for_subparser(self): @unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2") -class MemoryProtectionTest(AttackProtectionTestCases, unittest.TestCase): +class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase): # With the default Expat configuration, the billion laughs protection may # hit before the allocation limiter if exponential_expansion_payload() is - # not carefully parametrized. In particular, use the following assert_*() - # methods to check the error message of the active protection. + # not carefully parametrized. As such, the payloads should be chosen so + # that either the allocation limiter is hit before other protections are + # triggered or no protection at all is triggered. - def assert_active_protection(self, func, /, *args, **kwargs): - """Check that fnuc(*args, **kwargs) hits the allocation limit.""" + def assert_rejected(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) hits the allocation limit.""" msg = r"out of memory: line \d+, column \d+" self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) @@ -946,7 +959,7 @@ def set_activation_threshold(self, parser, threshold): def set_maximum_amplification(self, parser, max_factor): return parser.SetAllocTrackerMaximumAmplification(max_factor) - def test_set_attack_protection_threshold_reached(self): + def test_set_activation_threshold__threshold_reached(self): parser = expat.ParserCreate() # Choose a threshold expected to be always reached. self.set_activation_threshold(parser, 3) @@ -954,9 +967,9 @@ def test_set_attack_protection_threshold_reached(self): # and a payload whose peak amplification factor exceeds it. self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) payload = self.exponential_expansion_payload(10, 4) - self.assert_active_protection(parser.Parse, payload, True) + self.assert_rejected(parser.Parse, payload, True) - def test_set_attack_protection_threshold_ignored(self): + def test_set_activation_threshold__threshold_not_reached(self): parser = expat.ParserCreate() # Choose a threshold expected to be never reached. self.set_activation_threshold(parser, pow(10, 5)) @@ -966,7 +979,7 @@ def test_set_attack_protection_threshold_ignored(self): payload = self.exponential_expansion_payload(10, 4) self.assertIsNotNone(parser.Parse(payload, True)) - def test_set_maximum_amplification_reached(self): + def test_set_maximum_amplification__amplification_exceeded(self): parser = expat.ParserCreate() # Unconditionally enable maximum activation factor. self.set_activation_threshold(parser, 0) @@ -974,9 +987,9 @@ def test_set_maximum_amplification_reached(self): self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) # Craft a payload for which the peak amplification factor is > 1.0. payload = self.exponential_expansion_payload(1, 2) - self.assert_active_protection(parser.Parse, payload, True) + self.assert_rejected(parser.Parse, payload, True) - def test_set_maximum_amplification_ignored(self): + def test_set_maximum_amplification__amplification_not_exceeded(self): parser = expat.ParserCreate() # Unconditionally enable maximum activation factor. self.set_activation_threshold(parser, 0) From 209f30027dbd7da72c325d37f243b0756f4baf24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Sep 2025 11:42:27 +0200 Subject: [PATCH 29/32] update capsule API --- Include/pyexpat.h | 5 +++++ Modules/pyexpat.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 9824d099c3df7d..04548b7684a2fd 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -52,6 +52,11 @@ struct PyExpat_CAPI int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); /* might be NULL for expat < 2.6.0 */ XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled); + /* might be NULL for expat < 2.7.2 */ + XML_Bool (*SetAllocTrackerActivationThreshold)( + XML_Parser parser, unsigned long long activationThresholdBytes); + XML_Bool (*SetAllocTrackerMaximumAmplification)( + XML_Parser parser, float maxAmplificationFactor); /* always add new stuff to the end! */ }; diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index a19d125311bb77..50db77c4f12c16 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -2285,6 +2285,13 @@ pyexpat_exec(PyObject *mod) #else capi->SetReparseDeferralEnabled = NULL; #endif +#if XML_COMBINED_VERSION >= 20702 + capi->SetAllocTrackerActivationThreshold = XML_SetAllocTrackerActivationThreshold; + capi->SetAllocTrackerMaximumAmplification = XML_SetAllocTrackerMaximumAmplification; +#else + capi->SetAllocTrackerActivationThreshold = NULL; + capi->SetAllocTrackerMaximumAmplification = NULL; +#endif /* export using capsule */ PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME, From 8407cfcd298553f68e6dd29e1e74ff37d03ecea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Sep 2025 12:07:21 +0200 Subject: [PATCH 30/32] amend docs --- Doc/library/pyexpat.rst | 5 +++++ Doc/whatsnew/3.15.rst | 2 +- .../Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index b515a0287fca71..15527e522b7297 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -120,6 +120,11 @@ The :mod:`xml.parsers.expat` module contains two functions: XMLParser Objects ----------------- +.. class:: xmlparser + + The type of an Expat XML parser created by :func:`ParserCreate`. + + :class:`xmlparser` objects have the following methods: diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index ce27ab2eff1a1a..f3cba203a6a075 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -545,7 +545,7 @@ xml.parsers.expat * Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` - to :ref:`xmlparser ` objects to prevent use of + to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. (Contributed by Bénédikt Tran in :gh:`90949`.) diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst index eb715ebc0b02a0..3b55c446ea3dfe 100644 --- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -1,5 +1,5 @@ Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` -to :ref:`xmlparser ` objects to prevent use of -disproportional amounts of dynamic memory from within an Expat parser. Patch -by Bénédikt Tran. +to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of +disproportional amounts of dynamic memory from within an Expat parser. +Patch by Bénédikt Tran. From e89881162987adfe49098334f9446af62e4d9804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Sep 2025 16:55:06 +0200 Subject: [PATCH 31/32] use "NOTE:" --- Lib/test/test_pyexpat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 99bfe0b782ec6d..03a68320983913 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -942,9 +942,9 @@ def test_set_maximum_amplification__fail_for_subparser(self): @unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2") class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase): - # With the default Expat configuration, the billion laughs protection may - # hit before the allocation limiter if exponential_expansion_payload() is - # not carefully parametrized. As such, the payloads should be chosen so + # NOTE: with the default Expat configuration, the billion laughs protection + # may hit before the allocation limiter if exponential_expansion_payload() + # is not carefully parametrized. As such, the payloads should be chosen so # that either the allocation limiter is hit before other protections are # triggered or no protection at all is triggered. From ce8cb488a94f5e7aa6e28f93416cab5e385b648d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Sep 2025 15:40:06 +0200 Subject: [PATCH 32/32] address final comments --- Doc/library/pyexpat.rst | 22 +++++---- Doc/whatsnew/3.15.rst | 2 +- Lib/test/test_pyexpat.py | 45 +++++++++++++------ ...5-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 2 +- 4 files changed, 47 insertions(+), 24 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 15527e522b7297..4c21bc875217b9 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -72,6 +72,13 @@ The :mod:`xml.parsers.expat` module contains two functions: *encoding* [1]_ is given it will override the implicit or explicit encoding of the document. + .. _xmlparser-non-root: + + Parsers created through :func:`!ParserCreate` are called "root" parsers, + in the sense that they do not have any parent parser attached. Non-root + parsers are created by :meth:`parser.ExternalEntityParserCreate + `. + Expat can optionally do XML namespace processing for you, enabled by providing a value for *namespace_separator*. The value must be a one-character string; a :exc:`ValueError` will be raised if the string has an illegal length (``None`` @@ -120,11 +127,6 @@ The :mod:`xml.parsers.expat` module contains two functions: XMLParser Objects ----------------- -.. class:: xmlparser - - The type of an Expat XML parser created by :func:`ParserCreate`. - - :class:`xmlparser` objects have the following methods: @@ -236,7 +238,7 @@ XMLParser Objects .. versionadded:: 3.13 -:class:`xmlparser` objects have the following methods to mitigate some +:class:`!xmlparser` objects have the following methods to mitigate some common XML vulnerabilities. .. method:: xmlparser.SetAllocTrackerActivationThreshold(threshold, /) @@ -247,7 +249,8 @@ common XML vulnerabilities. By default, parser objects have an allocation activation threshold of 64 MiB, or equivalently 67,108,864 bytes. - An :exc:`ExpatError` is raised if this method is called on a non-root parser. + An :exc:`ExpatError` is raised if this method is called on a + |xml-non-root-parser| parser. The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` should not be used as they may have no special meaning. @@ -270,8 +273,8 @@ common XML vulnerabilities. By default, parser objects have a maximum amplification factor of 100.0. - An :exc:`ExpatError` is raised if this method is called on a non-root - parser or if *max_factor* is outside the valid range. + An :exc:`ExpatError` is raised if this method is called on a + |xml-non-root-parser| parser or if *max_factor* is outside the valid range. The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` should not be used as they may have no special meaning. @@ -1007,3 +1010,4 @@ The ``errors`` module has the following attributes: not. See https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EncodingDecl and https://www.iana.org/assignments/character-sets/character-sets.xhtml. +.. |xml-non-root-parser| replace:: :ref:`non-root ` diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index f3cba203a6a075..ce27ab2eff1a1a 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -545,7 +545,7 @@ xml.parsers.expat * Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` - to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of + to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. (Contributed by Bénédikt Tran in :gh:`90949`.) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 03a68320983913..9cf9ac2f613b6e 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -7,6 +7,7 @@ import re import sys import sysconfig +import textwrap import unittest import traceback from io import BytesIO @@ -834,24 +835,42 @@ class AttackProtectionTestBase(abc.ABC): """ @staticmethod - def exponential_expansion_payload(ncols, nrows, text='.'): + def exponential_expansion_payload(*, nrows, ncols, text='.'): """Create a billion laughs attack payload. Be careful: the number of total items is pow(n, k), thereby requiring at least pow(ncols, nrows) * sizeof(text) memory! """ + template = textwrap.dedent(f"""\ + + + + {{body}} + ]> + &row{nrows}; + """).rstrip() + body = '\n'.join( f'' for i in range(nrows) ) - return f"""\ - - - -{body} -]> -&row{nrows};""" + body = textwrap.indent(body, ' ' * 4) + return template.format(body=body) + + def test_payload_generation(self): + # self-test for exponential_expansion_payload() + payload = self.exponential_expansion_payload(nrows=2, ncols=3) + self.assertEqual(payload, textwrap.dedent("""\ + + + + + + ]> + &row2; + """).rstrip()) def assert_root_parser_failure(self, func, /, *args, **kwargs): """Check that func(*args, **kwargs) is invalid for a sub-parser.""" @@ -966,7 +985,7 @@ def test_set_activation_threshold__threshold_reached(self): # Check that the threshold is reached by choosing a small factor # and a payload whose peak amplification factor exceeds it. self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - payload = self.exponential_expansion_payload(10, 4) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) self.assert_rejected(parser.Parse, payload, True) def test_set_activation_threshold__threshold_not_reached(self): @@ -976,7 +995,7 @@ def test_set_activation_threshold__threshold_not_reached(self): # Check that the threshold is reached by choosing a small factor # and a payload whose peak amplification factor exceeds it. self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - payload = self.exponential_expansion_payload(10, 4) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) self.assertIsNotNone(parser.Parse(payload, True)) def test_set_maximum_amplification__amplification_exceeded(self): @@ -986,7 +1005,7 @@ def test_set_maximum_amplification__amplification_exceeded(self): # Choose a max amplification factor expected to always be exceeded. self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) # Craft a payload for which the peak amplification factor is > 1.0. - payload = self.exponential_expansion_payload(1, 2) + payload = self.exponential_expansion_payload(ncols=1, nrows=2) self.assert_rejected(parser.Parse, payload, True) def test_set_maximum_amplification__amplification_not_exceeded(self): @@ -996,7 +1015,7 @@ def test_set_maximum_amplification__amplification_not_exceeded(self): # Choose a max amplification factor expected to never be exceeded. self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) # Craft a payload for which the peak amplification factor is < 1e4. - payload = self.exponential_expansion_payload(1, 2) + payload = self.exponential_expansion_payload(ncols=1, nrows=2) self.assertIsNotNone(parser.Parse(payload, True)) diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst index 3b55c446ea3dfe..0719d4353fb708 100644 --- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -1,5 +1,5 @@ Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` -to :class:`~xml.parsers.expat.xmlparser` objects to prevent use of +to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. Patch by Bénédikt Tran.