Skip to content

Commit 5d4e891

Browse files
[3.12] pythongh-126033: fix UAF in xml.etree.ElementTree.Element.remove when concurrent mutations happen (pythonGH-126124) (python#131930)
pythongh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (pythonGH-126124) (cherry picked from commit bab1398) Co-authored-by: Bénédikt Tran <[email protected]>
1 parent f1689b6 commit 5d4e891

File tree

3 files changed

+213
-36
lines changed

3 files changed

+213
-36
lines changed

Lib/test/test_xml_etree.py

Lines changed: 178 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
import textwrap
1919
import types
2020
import unittest
21+
import unittest.mock as mock
2122
import warnings
2223
import weakref
2324

25+
from contextlib import nullcontext
2426
from functools import partial
2527
from itertools import product, islice
2628
from test import support
@@ -121,6 +123,21 @@
121123
</foo>
122124
"""
123125

126+
def is_python_implementation():
127+
assert ET is not None, "ET must be initialized"
128+
assert pyET is not None, "pyET must be initialized"
129+
return ET is pyET
130+
131+
132+
def equal_wrapper(cls):
133+
"""Mock cls.__eq__ to check whether it has been called or not.
134+
135+
The behaviour of cls.__eq__ (side-effects included) is left as is.
136+
"""
137+
eq = cls.__eq__
138+
return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
139+
140+
124141
def checkwarnings(*filters, quiet=False):
125142
def decorator(test):
126143
def newtest(*args, **kwargs):
@@ -2562,6 +2579,7 @@ def test_pickle_issue18997(self):
25622579

25632580

25642581
class BadElementTest(ElementTestCase, unittest.TestCase):
2582+
25652583
def test_extend_mutable_list(self):
25662584
class X:
25672585
@property
@@ -2600,18 +2618,168 @@ class Y(X, ET.Element):
26002618
e = ET.Element('foo')
26012619
e.extend(L)
26022620

2603-
def test_remove_with_mutating(self):
2604-
class X(ET.Element):
2621+
def test_remove_with_clear_assume_missing(self):
2622+
# gh-126033: Check that a concurrent clear() for an assumed-to-be
2623+
# missing element does not make the interpreter crash.
2624+
self.do_test_remove_with_clear(raises=True)
2625+
2626+
def test_remove_with_clear_assume_existing(self):
2627+
# gh-126033: Check that a concurrent clear() for an assumed-to-be
2628+
# existing element does not make the interpreter crash.
2629+
self.do_test_remove_with_clear(raises=False)
2630+
2631+
def do_test_remove_with_clear(self, *, raises):
2632+
2633+
# Until the discrepency between "del root[:]" and "root.clear()" is
2634+
# resolved, we need to keep two tests. Previously, using "del root[:]"
2635+
# did not crash with the reproducer of gh-126033 while "root.clear()"
2636+
# did.
2637+
2638+
class E(ET.Element):
2639+
"""Local class to be able to mock E.__eq__ for introspection."""
2640+
2641+
class X(E):
26052642
def __eq__(self, o):
2606-
del e[:]
2607-
return False
2608-
e = ET.Element('foo')
2609-
e.extend([X('bar')])
2610-
self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2643+
del root[:]
2644+
return not raises
26112645

2612-
e = ET.Element('foo')
2613-
e.extend([ET.Element('bar')])
2614-
self.assertRaises(ValueError, e.remove, X('baz'))
2646+
class Y(E):
2647+
def __eq__(self, o):
2648+
root.clear()
2649+
return not raises
2650+
2651+
if raises:
2652+
get_checker_context = lambda: self.assertRaises(ValueError)
2653+
else:
2654+
get_checker_context = nullcontext
2655+
2656+
self.assertIs(E.__eq__, object.__eq__)
2657+
2658+
for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
2659+
self.enterContext(self.subTest(side_effect=side_effect))
2660+
2661+
# test removing R() from [U()]
2662+
for R, U, description in [
2663+
(E, Z, "remove missing E() from [Z()]"),
2664+
(Z, E, "remove missing Z() from [E()]"),
2665+
(Z, Z, "remove missing Z() from [Z()]"),
2666+
]:
2667+
with self.subTest(description):
2668+
root = E('top')
2669+
root.extend([U('one')])
2670+
with get_checker_context():
2671+
root.remove(R('missing'))
2672+
2673+
# test removing R() from [U(), V()]
2674+
cases = self.cases_for_remove_missing_with_mutations(E, Z)
2675+
for R, U, V, description in cases:
2676+
with self.subTest(description):
2677+
root = E('top')
2678+
root.extend([U('one'), V('two')])
2679+
with get_checker_context():
2680+
root.remove(R('missing'))
2681+
2682+
# Test removing root[0] from [Z()].
2683+
#
2684+
# Since we call root.remove() with root[0], Z.__eq__()
2685+
# will not be called (we branch on the fast Py_EQ path).
2686+
with self.subTest("remove root[0] from [Z()]"):
2687+
root = E('top')
2688+
root.append(Z('rem'))
2689+
with equal_wrapper(E) as f, equal_wrapper(Z) as g:
2690+
root.remove(root[0])
2691+
f.assert_not_called()
2692+
g.assert_not_called()
2693+
2694+
# Test removing root[1] (of type R) from [U(), R()].
2695+
is_special = is_python_implementation() and raises and Z is Y
2696+
if is_python_implementation() and raises and Z is Y:
2697+
# In pure Python, using root.clear() sets the children
2698+
# list to [] without calling list.clear().
2699+
#
2700+
# For this reason, the call to root.remove() first
2701+
# checks root[0] and sets the children list to []
2702+
# since either root[0] or root[1] is an evil element.
2703+
#
2704+
# Since checking root[1] still uses the old reference
2705+
# to the children list, PyObject_RichCompareBool() branches
2706+
# to the fast Py_EQ path and Y.__eq__() is called exactly
2707+
# once (when checking root[0]).
2708+
continue
2709+
else:
2710+
cases = self.cases_for_remove_existing_with_mutations(E, Z)
2711+
for R, U, description in cases:
2712+
with self.subTest(description):
2713+
root = E('top')
2714+
root.extend([U('one'), R('rem')])
2715+
with get_checker_context():
2716+
root.remove(root[1])
2717+
2718+
def test_remove_with_mutate_root_assume_missing(self):
2719+
# gh-126033: Check that a concurrent mutation for an assumed-to-be
2720+
# missing element does not make the interpreter crash.
2721+
self.do_test_remove_with_mutate_root(raises=True)
2722+
2723+
def test_remove_with_mutate_root_assume_existing(self):
2724+
# gh-126033: Check that a concurrent mutation for an assumed-to-be
2725+
# existing element does not make the interpreter crash.
2726+
self.do_test_remove_with_mutate_root(raises=False)
2727+
2728+
def do_test_remove_with_mutate_root(self, *, raises):
2729+
E = ET.Element
2730+
2731+
class Z(E):
2732+
def __eq__(self, o):
2733+
del root[0]
2734+
return not raises
2735+
2736+
if raises:
2737+
get_checker_context = lambda: self.assertRaises(ValueError)
2738+
else:
2739+
get_checker_context = nullcontext
2740+
2741+
# test removing R() from [U(), V()]
2742+
cases = self.cases_for_remove_missing_with_mutations(E, Z)
2743+
for R, U, V, description in cases:
2744+
with self.subTest(description):
2745+
root = E('top')
2746+
root.extend([U('one'), V('two')])
2747+
with get_checker_context():
2748+
root.remove(R('missing'))
2749+
2750+
# test removing root[1] (of type R) from [U(), R()]
2751+
cases = self.cases_for_remove_existing_with_mutations(E, Z)
2752+
for R, U, description in cases:
2753+
with self.subTest(description):
2754+
root = E('top')
2755+
root.extend([U('one'), R('rem')])
2756+
with get_checker_context():
2757+
root.remove(root[1])
2758+
2759+
def cases_for_remove_missing_with_mutations(self, E, Z):
2760+
# Cases for removing R() from [U(), V()].
2761+
# The case U = V = R = E is not interesting as there is no mutation.
2762+
for U, V in [(E, Z), (Z, E), (Z, Z)]:
2763+
description = (f"remove missing {E.__name__}() from "
2764+
f"[{U.__name__}(), {V.__name__}()]")
2765+
yield E, U, V, description
2766+
2767+
for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
2768+
description = (f"remove missing {Z.__name__}() from "
2769+
f"[{U.__name__}(), {V.__name__}()]")
2770+
yield Z, U, V, description
2771+
2772+
def cases_for_remove_existing_with_mutations(self, E, Z):
2773+
# Cases for removing root[1] (of type R) from [U(), R()].
2774+
# The case U = R = E is not interesting as there is no mutation.
2775+
for U, R, description in [
2776+
(E, Z, "remove root[1] from [E(), Z()]"),
2777+
(Z, E, "remove root[1] from [Z(), E()]"),
2778+
(Z, Z, "remove root[1] from [Z(), Z()]"),
2779+
]:
2780+
description = (f"remove root[1] (of type {R.__name__}) "
2781+
f"from [{U.__name__}(), {R.__name__}()]")
2782+
yield R, U, description
26152783

26162784
@support.infinite_recursion(25)
26172785
def test_recursive_repr(self):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
2+
<xml.etree.ElementTree.Element.remove>` when the element is
3+
concurrently mutated. Patch by Bénédikt Tran.

Modules/_elementtree.c

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
845845
if (element_resize(element, self->extra->length) < 0)
846846
goto error;
847847

848+
// TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
848849
for (i = 0; i < self->extra->length; i++) {
849850
PyObject* child = deepcopy(st, self->extra->children[i], memo);
850851
if (!child || !Element_Check(st, child)) {
@@ -1627,42 +1628,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
16271628
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
16281629
{
16291630
Py_ssize_t i;
1630-
int rc;
1631-
PyObject *found;
1632-
1633-
if (!self->extra) {
1634-
/* element has no children, so raise exception */
1635-
PyErr_SetString(
1636-
PyExc_ValueError,
1637-
"list.remove(x): x not in list"
1638-
);
1639-
return NULL;
1640-
}
1641-
1642-
for (i = 0; i < self->extra->length; i++) {
1643-
if (self->extra->children[i] == subelement)
1644-
break;
1645-
rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1646-
if (rc > 0)
1631+
// When iterating over the list of children, we need to check that the
1632+
// list is not cleared (self->extra != NULL) and that we are still within
1633+
// the correct bounds (i < self->extra->length).
1634+
//
1635+
// We deliberately avoid protecting against children lists that grow
1636+
// faster than the index since list objects do not protect against it.
1637+
int rc = 0;
1638+
for (i = 0; self->extra && i < self->extra->length; i++) {
1639+
if (self->extra->children[i] == subelement) {
1640+
rc = 1;
16471641
break;
1648-
if (rc < 0)
1642+
}
1643+
PyObject *child = Py_NewRef(self->extra->children[i]);
1644+
rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
1645+
Py_DECREF(child);
1646+
if (rc < 0) {
16491647
return NULL;
1648+
}
1649+
else if (rc > 0) {
1650+
break;
1651+
}
16501652
}
16511653

1652-
if (i >= self->extra->length) {
1653-
/* subelement is not in children, so raise exception */
1654-
PyErr_SetString(
1655-
PyExc_ValueError,
1656-
"list.remove(x): x not in list"
1657-
);
1654+
if (rc == 0) {
1655+
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
16581656
return NULL;
16591657
}
16601658

1661-
found = self->extra->children[i];
1659+
// An extra check must be done if the mutation occurs at the very last
1660+
// step and removes or clears the 'extra' list (the condition on the
1661+
// length would not be satisfied any more).
1662+
if (self->extra == NULL || i >= self->extra->length) {
1663+
Py_RETURN_NONE;
1664+
}
1665+
1666+
PyObject *found = self->extra->children[i];
16621667

16631668
self->extra->length--;
1664-
for (; i < self->extra->length; i++)
1669+
for (; i < self->extra->length; i++) {
16651670
self->extra->children[i] = self->extra->children[i+1];
1671+
}
16661672

16671673
Py_DECREF(found);
16681674
Py_RETURN_NONE;

0 commit comments

Comments
 (0)