Skip to content

Commit 37f71cb

Browse files
committed
pythongh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (python#139403)
* Modules/pyexpat.c: Disallow collection of in-use parent parsers. Within libexpat, a parser created via `XML_ExternalEntityParserCreate` is relying on its parent parser throughout its entire lifetime. Prior to this fix, is was possible for the parent parser to be garbage-collected too early. (cherry picked from commit 6edb2dd)
1 parent 8adac49 commit 37f71cb

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,42 @@ def resolve_entity(context, base, system_id, public_id):
758758
self.assertEqual(handler_call_args, [("bar", "baz")])
759759

760760

761+
class ParentParserLifetimeTest(unittest.TestCase):
762+
"""
763+
Subparsers make use of their parent XML_Parser inside of Expat.
764+
As a result, parent parsers need to outlive subparsers.
765+
766+
See https://github.com/python/cpython/issues/139400.
767+
"""
768+
769+
def test_parent_parser_outlives_its_subparsers__single(self):
770+
parser = expat.ParserCreate()
771+
subparser = parser.ExternalEntityParserCreate(None)
772+
773+
# Now try to cause garbage collection of the parent parser
774+
# while it's still being referenced by a related subparser.
775+
del parser
776+
777+
def test_parent_parser_outlives_its_subparsers__multiple(self):
778+
parser = expat.ParserCreate()
779+
subparser_one = parser.ExternalEntityParserCreate(None)
780+
subparser_two = parser.ExternalEntityParserCreate(None)
781+
782+
# Now try to cause garbage collection of the parent parser
783+
# while it's still being referenced by a related subparser.
784+
del parser
785+
786+
def test_parent_parser_outlives_its_subparsers__chain(self):
787+
parser = expat.ParserCreate()
788+
subparser = parser.ExternalEntityParserCreate(None)
789+
subsubparser = subparser.ExternalEntityParserCreate(None)
790+
791+
# Now try to cause garbage collection of the parent parsers
792+
# while they are still being referenced by a related subparser.
793+
del parser
794+
del subparser
795+
796+
761797
class ReparseDeferralTest(unittest.TestCase):
762798
def test_getter_setter_round_trip(self):
763799
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ typedef struct {
6969
PyObject_HEAD
7070

7171
XML_Parser itself;
72+
/*
73+
* Strong reference to a parent `xmlparseobject` if this parser
74+
* is a child parser. Set to NULL if this parser is a root parser.
75+
* This is needed to keep the parent parser alive as long as it has
76+
* at least one child parser.
77+
*
78+
* See https://github.com/python/cpython/issues/139400 for details.
79+
*/
80+
PyObject *parent;
7281
int ordered_attributes; /* Return attributes as a list. */
7382
int specified_attributes; /* Report only specified attributes. */
7483
int in_callback; /* Is a callback active? */
@@ -990,6 +999,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
990999
return NULL;
9911000
}
9921001

1002+
// The new subparser will make use of the parent XML_Parser inside of Expat.
1003+
// So we need to take subparsers into account with the reference counting
1004+
// of their parent parser.
1005+
Py_INCREF(self);
1006+
9931007
new_parser->buffer_size = self->buffer_size;
9941008
new_parser->buffer_used = 0;
9951009
new_parser->buffer = NULL;
@@ -999,6 +1013,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
9991013
new_parser->ns_prefixes = self->ns_prefixes;
10001014
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
10011015
encoding);
1016+
new_parser->parent = (PyObject *)self;
10021017
new_parser->handlers = 0;
10031018
new_parser->intern = self->intern;
10041019
Py_XINCREF(new_parser->intern);
@@ -1007,11 +1022,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10071022
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
10081023
if (new_parser->buffer == NULL) {
10091024
Py_DECREF(new_parser);
1025+
Py_DECREF(self);
10101026
return PyErr_NoMemory();
10111027
}
10121028
}
10131029
if (!new_parser->itself) {
10141030
Py_DECREF(new_parser);
1031+
Py_DECREF(self);
10151032
return PyErr_NoMemory();
10161033
}
10171034

@@ -1024,6 +1041,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10241041
new_parser->handlers = PyMem_New(PyObject *, i);
10251042
if (!new_parser->handlers) {
10261043
Py_DECREF(new_parser);
1044+
Py_DECREF(self);
10271045
return PyErr_NoMemory();
10281046
}
10291047
clear_handlers(new_parser, 1);
@@ -1202,6 +1220,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
12021220
/* namespace_separator is either NULL or contains one char + \0 */
12031221
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
12041222
namespace_separator);
1223+
self->parent = NULL;
12051224
if (self->itself == NULL) {
12061225
PyErr_SetString(PyExc_RuntimeError,
12071226
"XML_ParserCreate failed");
@@ -1237,6 +1256,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
12371256
for (int i = 0; handler_info[i].name != NULL; i++) {
12381257
Py_VISIT(op->handlers[i]);
12391258
}
1259+
Py_VISIT(op->parent);
12401260
Py_VISIT(Py_TYPE(op));
12411261
return 0;
12421262
}
@@ -1246,6 +1266,10 @@ xmlparse_clear(xmlparseobject *op)
12461266
{
12471267
clear_handlers(op, 0);
12481268
Py_CLEAR(op->intern);
1269+
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1270+
// XML_ParserFree(op->itself), or a subparser could lose its parent
1271+
// XML_Parser while still making use of it internally.
1272+
// https://github.com/python/cpython/issues/139400
12491273
return 0;
12501274
}
12511275

@@ -1257,6 +1281,7 @@ xmlparse_dealloc(xmlparseobject *self)
12571281
if (self->itself != NULL)
12581282
XML_ParserFree(self->itself);
12591283
self->itself = NULL;
1284+
Py_CLEAR(self->parent);
12601285

12611286
if (self->handlers != NULL) {
12621287
PyMem_Free(self->handlers);

0 commit comments

Comments
 (0)