Skip to content

Commit 4e450d1

Browse files
hartworkdanigm
authored andcommitted
pythongh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (python#139403)
* Modules/pyexpat.c: Disallow collection of in-use parent parsers. Within libexpat, a parser created via `XML_ExternalEntityParserCreate` is relying on its parent parser throughout its entire lifetime. Prior to this fix, is was possible for the parent parser to be garbage-collected too early.
1 parent e4f4a89 commit 4e450d1

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,42 @@ def resolve_entity(context, base, system_id, public_id):
771771
self.assertEqual(handler_call_args, [("bar", "baz")])
772772

773773

774+
class ParentParserLifetimeTest(unittest.TestCase):
775+
"""
776+
Subparsers make use of their parent XML_Parser inside of Expat.
777+
As a result, parent parsers need to outlive subparsers.
778+
779+
See https://github.com/python/cpython/issues/139400.
780+
"""
781+
782+
def test_parent_parser_outlives_its_subparsers__single(self):
783+
parser = expat.ParserCreate()
784+
subparser = parser.ExternalEntityParserCreate(None)
785+
786+
# Now try to cause garbage collection of the parent parser
787+
# while it's still being referenced by a related subparser.
788+
del parser
789+
790+
def test_parent_parser_outlives_its_subparsers__multiple(self):
791+
parser = expat.ParserCreate()
792+
subparser_one = parser.ExternalEntityParserCreate(None)
793+
subparser_two = parser.ExternalEntityParserCreate(None)
794+
795+
# Now try to cause garbage collection of the parent parser
796+
# while it's still being referenced by a related subparser.
797+
del parser
798+
799+
def test_parent_parser_outlives_its_subparsers__chain(self):
800+
parser = expat.ParserCreate()
801+
subparser = parser.ExternalEntityParserCreate(None)
802+
subsubparser = subparser.ExternalEntityParserCreate(None)
803+
804+
# Now try to cause garbage collection of the parent parsers
805+
# while they are still being referenced by a related subparser.
806+
del parser
807+
del subparser
808+
809+
774810
class ReparseDeferralTest(unittest.TestCase):
775811
def test_getter_setter_round_trip(self):
776812
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,15 @@ typedef struct {
7676
PyObject_HEAD
7777

7878
XML_Parser itself;
79+
/*
80+
* Strong reference to a parent `xmlparseobject` if this parser
81+
* is a child parser. Set to NULL if this parser is a root parser.
82+
* This is needed to keep the parent parser alive as long as it has
83+
* at least one child parser.
84+
*
85+
* See https://github.com/python/cpython/issues/139400 for details.
86+
*/
87+
PyObject *parent;
7988
int ordered_attributes; /* Return attributes as a list. */
8089
int specified_attributes; /* Report only specified attributes. */
8190
int in_callback; /* Is a callback active? */
@@ -1067,6 +1076,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10671076
return NULL;
10681077
}
10691078

1079+
// The new subparser will make use of the parent XML_Parser inside of Expat.
1080+
// So we need to take subparsers into account with the reference counting
1081+
// of their parent parser.
1082+
Py_INCREF(self);
1083+
10701084
new_parser->buffer_size = self->buffer_size;
10711085
new_parser->buffer_used = 0;
10721086
new_parser->buffer = NULL;
@@ -1076,18 +1090,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10761090
new_parser->ns_prefixes = self->ns_prefixes;
10771091
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
10781092
encoding);
1093+
new_parser->parent = (PyObject *)self;
10791094
new_parser->handlers = 0;
10801095
new_parser->intern = Py_XNewRef(self->intern);
10811096

10821097
if (self->buffer != NULL) {
10831098
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
10841099
if (new_parser->buffer == NULL) {
10851100
Py_DECREF(new_parser);
1101+
Py_DECREF(self);
10861102
return PyErr_NoMemory();
10871103
}
10881104
}
10891105
if (!new_parser->itself) {
10901106
Py_DECREF(new_parser);
1107+
Py_DECREF(self);
10911108
return PyErr_NoMemory();
10921109
}
10931110

@@ -1101,6 +1118,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
11011118
new_parser->handlers = PyMem_New(PyObject *, i);
11021119
if (!new_parser->handlers) {
11031120
Py_DECREF(new_parser);
1121+
Py_DECREF(self);
11041122
return PyErr_NoMemory();
11051123
}
11061124
clear_handlers(new_parser, 1);
@@ -1481,6 +1499,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
14811499
/* namespace_separator is either NULL or contains one char + \0 */
14821500
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
14831501
namespace_separator);
1502+
self->parent = NULL;
14841503
if (self->itself == NULL) {
14851504
PyErr_SetString(PyExc_RuntimeError,
14861505
"XML_ParserCreate failed");
@@ -1517,6 +1536,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
15171536
for (size_t i = 0; handler_info[i].name != NULL; i++) {
15181537
Py_VISIT(self->handlers[i]);
15191538
}
1539+
Py_VISIT(self->parent);
15201540
Py_VISIT(Py_TYPE(op));
15211541
return 0;
15221542
}
@@ -1527,6 +1547,10 @@ xmlparse_clear(PyObject *op)
15271547
xmlparseobject *self = xmlparseobject_CAST(op);
15281548
clear_handlers(self, 0);
15291549
Py_CLEAR(self->intern);
1550+
// NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
1551+
// XML_ParserFree(self->itself), or a subparser could lose its parent
1552+
// XML_Parser while still making use of it internally.
1553+
// https://github.com/python/cpython/issues/139400
15301554
return 0;
15311555
}
15321556

@@ -1540,6 +1564,7 @@ xmlparse_dealloc(PyObject *op)
15401564
XML_ParserFree(self->itself);
15411565
}
15421566
self->itself = NULL;
1567+
Py_CLEAR(self->parent);
15431568

15441569
if (self->handlers != NULL) {
15451570
PyMem_Free(self->handlers);

0 commit comments

Comments
 (0)