Skip to content

Commit 135a66e

Browse files
committed
pythongh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (python#139403)
* Modules/pyexpat.c: Disallow collection of in-use parent parsers. Within libexpat, a parser created via `XML_ExternalEntityParserCreate` is relying on its parent parser throughout its entire lifetime. Prior to this fix, is was possible for the parent parser to be garbage-collected too early. (cherry picked from commit 6edb2dd)
1 parent f610f9e commit 135a66e

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,42 @@ def resolve_entity(context, base, system_id, public_id):
730730
self.assertEqual(handler_call_args, [("bar", "baz")])
731731

732732

733+
class ParentParserLifetimeTest(unittest.TestCase):
734+
"""
735+
Subparsers make use of their parent XML_Parser inside of Expat.
736+
As a result, parent parsers need to outlive subparsers.
737+
738+
See https://github.com/python/cpython/issues/139400.
739+
"""
740+
741+
def test_parent_parser_outlives_its_subparsers__single(self):
742+
parser = expat.ParserCreate()
743+
subparser = parser.ExternalEntityParserCreate(None)
744+
745+
# Now try to cause garbage collection of the parent parser
746+
# while it's still being referenced by a related subparser.
747+
del parser
748+
749+
def test_parent_parser_outlives_its_subparsers__multiple(self):
750+
parser = expat.ParserCreate()
751+
subparser_one = parser.ExternalEntityParserCreate(None)
752+
subparser_two = parser.ExternalEntityParserCreate(None)
753+
754+
# Now try to cause garbage collection of the parent parser
755+
# while it's still being referenced by a related subparser.
756+
del parser
757+
758+
def test_parent_parser_outlives_its_subparsers__chain(self):
759+
parser = expat.ParserCreate()
760+
subparser = parser.ExternalEntityParserCreate(None)
761+
subsubparser = subparser.ExternalEntityParserCreate(None)
762+
763+
# Now try to cause garbage collection of the parent parsers
764+
# while they are still being referenced by a related subparser.
765+
del parser
766+
del subparser
767+
768+
733769
class ReparseDeferralTest(unittest.TestCase):
734770
def test_getter_setter_round_trip(self):
735771
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ typedef struct {
6969
PyObject_HEAD
7070

7171
XML_Parser itself;
72+
/*
73+
* Strong reference to a parent `xmlparseobject` if this parser
74+
* is a child parser. Set to NULL if this parser is a root parser.
75+
* This is needed to keep the parent parser alive as long as it has
76+
* at least one child parser.
77+
*
78+
* See https://github.com/python/cpython/issues/139400 for details.
79+
*/
80+
PyObject *parent;
7281
int ordered_attributes; /* Return attributes as a list. */
7382
int specified_attributes; /* Report only specified attributes. */
7483
int in_callback; /* Is a callback active? */
@@ -989,6 +998,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
989998
return NULL;
990999
}
9911000

1001+
// The new subparser will make use of the parent XML_Parser inside of Expat.
1002+
// So we need to take subparsers into account with the reference counting
1003+
// of their parent parser.
1004+
Py_INCREF(self);
1005+
9921006
new_parser->buffer_size = self->buffer_size;
9931007
new_parser->buffer_used = 0;
9941008
new_parser->buffer = NULL;
@@ -998,6 +1012,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
9981012
new_parser->ns_prefixes = self->ns_prefixes;
9991013
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
10001014
encoding);
1015+
new_parser->parent = (PyObject *)self;
10011016
new_parser->handlers = 0;
10021017
new_parser->intern = self->intern;
10031018
Py_XINCREF(new_parser->intern);
@@ -1006,11 +1021,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10061021
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
10071022
if (new_parser->buffer == NULL) {
10081023
Py_DECREF(new_parser);
1024+
Py_DECREF(self);
10091025
return PyErr_NoMemory();
10101026
}
10111027
}
10121028
if (!new_parser->itself) {
10131029
Py_DECREF(new_parser);
1030+
Py_DECREF(self);
10141031
return PyErr_NoMemory();
10151032
}
10161033

@@ -1023,6 +1040,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10231040
new_parser->handlers = PyMem_New(PyObject *, i);
10241041
if (!new_parser->handlers) {
10251042
Py_DECREF(new_parser);
1043+
Py_DECREF(self);
10261044
return PyErr_NoMemory();
10271045
}
10281046
clear_handlers(new_parser, 1);
@@ -1201,6 +1219,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
12011219
/* namespace_separator is either NULL or contains one char + \0 */
12021220
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
12031221
namespace_separator);
1222+
self->parent = NULL;
12041223
if (self->itself == NULL) {
12051224
PyErr_SetString(PyExc_RuntimeError,
12061225
"XML_ParserCreate failed");
@@ -1236,6 +1255,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
12361255
for (int i = 0; handler_info[i].name != NULL; i++) {
12371256
Py_VISIT(op->handlers[i]);
12381257
}
1258+
Py_VISIT(op->parent);
12391259
Py_VISIT(Py_TYPE(op));
12401260
return 0;
12411261
}
@@ -1245,6 +1265,10 @@ xmlparse_clear(xmlparseobject *op)
12451265
{
12461266
clear_handlers(op, 0);
12471267
Py_CLEAR(op->intern);
1268+
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1269+
// XML_ParserFree(op->itself), or a subparser could lose its parent
1270+
// XML_Parser while still making use of it internally.
1271+
// https://github.com/python/cpython/issues/139400
12481272
return 0;
12491273
}
12501274

@@ -1256,6 +1280,7 @@ xmlparse_dealloc(xmlparseobject *self)
12561280
if (self->itself != NULL)
12571281
XML_ParserFree(self->itself);
12581282
self->itself = NULL;
1283+
Py_CLEAR(self->parent);
12591284

12601285
if (self->handlers != NULL) {
12611286
PyMem_Free(self->handlers);

0 commit comments

Comments
 (0)