Skip to content

Commit de690f1

Browse files
picnixzhartwork
authored andcommitted
pythongh-90949: add Expat API to prevent XML deadly allocations (CVE-2025-59375) (python#139234)
Expose the XML Expat 2.7.2 mitigation APIs to disallow use of disproportional amounts of dynamic memory from within an Expat parser (see CVE-2025-59375 for instance). The exposed APIs are available on Expat parsers, that is, parsers created by `xml.parsers.expat.ParserCreate()`, as: - `parser.SetAllocTrackerActivationThreshold(threshold)`, and - `parser.SetAllocTrackerMaximumAmplification(max_factor)`. (cherry picked from commit f04bea4)
1 parent f610f9e commit de690f1

File tree

7 files changed

+545
-28
lines changed

7 files changed

+545
-28
lines changed

Doc/library/pyexpat.rst

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ The :mod:`xml.parsers.expat` module contains two functions:
7373
*encoding* [1]_ is given it will override the implicit or explicit encoding of the
7474
document.
7575

76+
.. _xmlparser-non-root:
77+
78+
Parsers created through :func:`!ParserCreate` are called "root" parsers,
79+
in the sense that they do not have any parent parser attached. Non-root
80+
parsers are created by :meth:`parser.ExternalEntityParserCreate
81+
<xmlparser.ExternalEntityParserCreate>`.
82+
7683
Expat can optionally do XML namespace processing for you, enabled by providing a
7784
value for *namespace_separator*. The value must be a one-character string; a
7885
:exc:`ValueError` will be raised if the string has an illegal length (``None``
@@ -232,6 +239,55 @@ XMLParser Objects
232239
.. versionadded:: 3.10.14
233240

234241

242+
:class:`!xmlparser` objects have the following methods to mitigate some
243+
common XML vulnerabilities.
244+
245+
.. method:: xmlparser.SetAllocTrackerActivationThreshold(threshold, /)
246+
247+
Sets the number of allocated bytes of dynamic memory needed to activate
248+
protection against disproportionate use of RAM.
249+
250+
By default, parser objects have an allocation activation threshold of 64 MiB,
251+
or equivalently 67,108,864 bytes.
252+
253+
An :exc:`ExpatError` is raised if this method is called on a
254+
|xml-non-root-parser| parser.
255+
The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset`
256+
should not be used as they may have no special meaning.
257+
258+
.. versionadded:: next
259+
260+
.. method:: xmlparser.SetAllocTrackerMaximumAmplification(max_factor, /)
261+
262+
Sets the maximum amplification factor between direct input and bytes
263+
of dynamic memory allocated.
264+
265+
The amplification factor is calculated as ``allocated / direct``
266+
while parsing, where ``direct`` is the number of bytes read from
267+
the primary document in parsing and ``allocated`` is the number
268+
of bytes of dynamic memory allocated in the parser hierarchy.
269+
270+
The *max_factor* value must be a non-NaN :class:`float` value greater than
271+
or equal to 1.0. Amplification factors greater than 100.0 can be observed
272+
near the start of parsing even with benign files in practice. In particular,
273+
the activation threshold should be carefully chosen to avoid false positives.
274+
275+
By default, parser objects have a maximum amplification factor of 100.0.
276+
277+
An :exc:`ExpatError` is raised if this method is called on a
278+
|xml-non-root-parser| parser or if *max_factor* is outside the valid range.
279+
The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset`
280+
should not be used as they may have no special meaning.
281+
282+
.. note::
283+
284+
The maximum amplification factor is only considered if the threshold
285+
that can be adjusted :meth:`.SetAllocTrackerActivationThreshold` is
286+
exceeded.
287+
288+
.. versionadded:: next
289+
290+
235291
:class:`xmlparser` objects have the following attributes:
236292

237293

@@ -910,3 +966,4 @@ The ``errors`` module has the following attributes:
910966
not. See https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EncodingDecl
911967
and https://www.iana.org/assignments/character-sets/character-sets.xhtml.
912968
969+
.. |xml-non-root-parser| replace:: :ref:`non-root <xmlparser-non-root>`

Include/pyexpat.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ struct PyExpat_CAPI
5252
int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt);
5353
/* might be NULL for expat < 2.6.0 */
5454
XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled);
55+
/* might be NULL for expat < 2.7.2 */
56+
XML_Bool (*SetAllocTrackerActivationThreshold)(
57+
XML_Parser parser, unsigned long long activationThresholdBytes);
58+
XML_Bool (*SetAllocTrackerMaximumAmplification)(
59+
XML_Parser parser, float maxAmplificationFactor);
5560
/* always add new stuff to the end! */
5661
};
5762

Lib/test/test_pyexpat.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,21 @@
22
# handler, are obscure and unhelpful.
33

44
from io import BytesIO
5+
import abc
6+
import functools
57
import os
68
import platform
9+
import re
710
import sys
811
import sysconfig
12+
import textwrap
913
import unittest
1014
import traceback
1115

1216
from xml.parsers import expat
1317
from xml.parsers.expat import errors
1418

19+
from test.support import import_helper
1520
from test.support import sortdict
1621

1722

@@ -784,5 +789,199 @@ def start_element(name, _):
784789
self.assertEqual(started, ['doc'])
785790

786791

792+
class AttackProtectionTestBase(abc.ABC):
793+
"""
794+
Base class for testing protections against XML payloads with
795+
disproportionate amplification.
796+
797+
The protections being tested should detect and prevent attacks
798+
that leverage disproportionate amplification from small inputs.
799+
"""
800+
801+
@staticmethod
802+
def exponential_expansion_payload(*, nrows, ncols, text='.'):
803+
"""Create a billion laughs attack payload.
804+
805+
Be careful: the number of total items is pow(n, k), thereby
806+
requiring at least pow(ncols, nrows) * sizeof(text) memory!
807+
"""
808+
template = textwrap.dedent(f"""\
809+
<?xml version="1.0"?>
810+
<!DOCTYPE doc [
811+
<!ENTITY row0 "{text}">
812+
<!ELEMENT doc (#PCDATA)>
813+
{{body}}
814+
]>
815+
<doc>&row{nrows};</doc>
816+
""").rstrip()
817+
818+
body = '\n'.join(
819+
f'<!ENTITY row{i + 1} "{f"&row{i};" * ncols}">'
820+
for i in range(nrows)
821+
)
822+
body = textwrap.indent(body, ' ' * 4)
823+
return template.format(body=body)
824+
825+
def test_payload_generation(self):
826+
# self-test for exponential_expansion_payload()
827+
payload = self.exponential_expansion_payload(nrows=2, ncols=3)
828+
self.assertEqual(payload, textwrap.dedent("""\
829+
<?xml version="1.0"?>
830+
<!DOCTYPE doc [
831+
<!ENTITY row0 ".">
832+
<!ELEMENT doc (#PCDATA)>
833+
<!ENTITY row1 "&row0;&row0;&row0;">
834+
<!ENTITY row2 "&row1;&row1;&row1;">
835+
]>
836+
<doc>&row2;</doc>
837+
""").rstrip())
838+
839+
def assert_root_parser_failure(self, func, /, *args, **kwargs):
840+
"""Check that func(*args, **kwargs) is invalid for a sub-parser."""
841+
msg = "parser must be a root parser"
842+
self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs)
843+
844+
@abc.abstractmethod
845+
def assert_rejected(self, func, /, *args, **kwargs):
846+
"""Assert that func(*args, **kwargs) triggers the attack protection.
847+
848+
Note: this method must ensure that the attack protection being tested
849+
is the one that is actually triggered at runtime, e.g., by matching
850+
the exact error message.
851+
"""
852+
853+
@abc.abstractmethod
854+
def set_activation_threshold(self, parser, threshold):
855+
"""Set the activation threshold for the tested protection."""
856+
857+
@abc.abstractmethod
858+
def set_maximum_amplification(self, parser, max_factor):
859+
"""Set the maximum amplification factor for the tested protection."""
860+
861+
@abc.abstractmethod
862+
def test_set_activation_threshold__threshold_reached(self):
863+
"""Test when the activation threshold is exceeded."""
864+
865+
@abc.abstractmethod
866+
def test_set_activation_threshold__threshold_not_reached(self):
867+
"""Test when the activation threshold is not exceeded."""
868+
869+
def test_set_activation_threshold__invalid_threshold_type(self):
870+
parser = expat.ParserCreate()
871+
setter = functools.partial(self.set_activation_threshold, parser)
872+
873+
self.assertRaises(TypeError, setter, 1.0)
874+
self.assertRaises(TypeError, setter, -1.5)
875+
self.assertRaises(ValueError, setter, -5)
876+
877+
def test_set_activation_threshold__invalid_threshold_range(self):
878+
_testcapi = import_helper.import_module("_testcapi")
879+
parser = expat.ParserCreate()
880+
setter = functools.partial(self.set_activation_threshold, parser)
881+
882+
self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1)
883+
884+
def test_set_activation_threshold__fail_for_subparser(self):
885+
parser = expat.ParserCreate()
886+
subparser = parser.ExternalEntityParserCreate(None)
887+
setter = functools.partial(self.set_activation_threshold, subparser)
888+
self.assert_root_parser_failure(setter, 12345)
889+
890+
@abc.abstractmethod
891+
def test_set_maximum_amplification__amplification_exceeded(self):
892+
"""Test when the amplification factor is exceeded."""
893+
894+
@abc.abstractmethod
895+
def test_set_maximum_amplification__amplification_not_exceeded(self):
896+
"""Test when the amplification factor is not exceeded."""
897+
898+
def test_set_maximum_amplification__infinity(self):
899+
inf = float('inf') # an 'inf' threshold is allowed by Expat
900+
parser = expat.ParserCreate()
901+
self.assertIsNone(self.set_maximum_amplification(parser, inf))
902+
903+
def test_set_maximum_amplification__invalid_max_factor_type(self):
904+
parser = expat.ParserCreate()
905+
setter = functools.partial(self.set_maximum_amplification, parser)
906+
907+
self.assertRaises(TypeError, setter, None)
908+
self.assertRaises(TypeError, setter, 'abc')
909+
910+
def test_set_maximum_amplification__invalid_max_factor_range(self):
911+
parser = expat.ParserCreate()
912+
setter = functools.partial(self.set_maximum_amplification, parser)
913+
914+
msg = re.escape("'max_factor' must be at least 1.0")
915+
self.assertRaisesRegex(expat.ExpatError, msg, setter, float('nan'))
916+
self.assertRaisesRegex(expat.ExpatError, msg, setter, 0.99)
917+
918+
def test_set_maximum_amplification__fail_for_subparser(self):
919+
parser = expat.ParserCreate()
920+
subparser = parser.ExternalEntityParserCreate(None)
921+
setter = functools.partial(self.set_maximum_amplification, subparser)
922+
self.assert_root_parser_failure(setter, 123.45)
923+
924+
925+
@unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2")
926+
class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase):
927+
928+
# NOTE: with the default Expat configuration, the billion laughs protection
929+
# may hit before the allocation limiter if exponential_expansion_payload()
930+
# is not carefully parametrized. As such, the payloads should be chosen so
931+
# that either the allocation limiter is hit before other protections are
932+
# triggered or no protection at all is triggered.
933+
934+
def assert_rejected(self, func, /, *args, **kwargs):
935+
"""Check that func(*args, **kwargs) hits the allocation limit."""
936+
msg = r"out of memory: line \d+, column \d+"
937+
self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs)
938+
939+
def set_activation_threshold(self, parser, threshold):
940+
return parser.SetAllocTrackerActivationThreshold(threshold)
941+
942+
def set_maximum_amplification(self, parser, max_factor):
943+
return parser.SetAllocTrackerMaximumAmplification(max_factor)
944+
945+
def test_set_activation_threshold__threshold_reached(self):
946+
parser = expat.ParserCreate()
947+
# Choose a threshold expected to be always reached.
948+
self.set_activation_threshold(parser, 3)
949+
# Check that the threshold is reached by choosing a small factor
950+
# and a payload whose peak amplification factor exceeds it.
951+
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
952+
payload = self.exponential_expansion_payload(ncols=10, nrows=4)
953+
self.assert_rejected(parser.Parse, payload, True)
954+
955+
def test_set_activation_threshold__threshold_not_reached(self):
956+
parser = expat.ParserCreate()
957+
# Choose a threshold expected to be never reached.
958+
self.set_activation_threshold(parser, pow(10, 5))
959+
# Check that the threshold is reached by choosing a small factor
960+
# and a payload whose peak amplification factor exceeds it.
961+
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
962+
payload = self.exponential_expansion_payload(ncols=10, nrows=4)
963+
self.assertIsNotNone(parser.Parse(payload, True))
964+
965+
def test_set_maximum_amplification__amplification_exceeded(self):
966+
parser = expat.ParserCreate()
967+
# Unconditionally enable maximum activation factor.
968+
self.set_activation_threshold(parser, 0)
969+
# Choose a max amplification factor expected to always be exceeded.
970+
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
971+
# Craft a payload for which the peak amplification factor is > 1.0.
972+
payload = self.exponential_expansion_payload(ncols=1, nrows=2)
973+
self.assert_rejected(parser.Parse, payload, True)
974+
975+
def test_set_maximum_amplification__amplification_not_exceeded(self):
976+
parser = expat.ParserCreate()
977+
# Unconditionally enable maximum activation factor.
978+
self.set_activation_threshold(parser, 0)
979+
# Choose a max amplification factor expected to never be exceeded.
980+
self.assertIsNone(self.set_maximum_amplification(parser, 1e4))
981+
# Craft a payload for which the peak amplification factor is < 1e4.
982+
payload = self.exponential_expansion_payload(ncols=1, nrows=2)
983+
self.assertIsNotNone(parser.Parse(payload, True))
984+
985+
787986
if __name__ == "__main__":
788987
unittest.main()
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold`
2+
and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification`
3+
to :ref:`xmlparser <xmlparser-objects>` objects to prevent use of
4+
disproportional amounts of dynamic memory from within an Expat parser.
5+
Patch by Bénédikt Tran.

0 commit comments

Comments
 (0)