Skip to content

Commit 69ab8fb

Browse files
authored
[3.13] pythongh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (pythonGH-139403) (pythonGH-139608)
Within libexpat, a parser created via `XML_ExternalEntityParserCreate` is relying on its parent parser throughout its entire lifetime. Prior to this fix, is was possible for the parent parser to be garbage-collected too early. (cherry picked from commit 6edb2dd)
1 parent 1d39dba commit 69ab8fb

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,42 @@ def resolve_entity(context, base, system_id, public_id):
755755
self.assertEqual(handler_call_args, [("bar", "baz")])
756756

757757

758+
class ParentParserLifetimeTest(unittest.TestCase):
759+
"""
760+
Subparsers make use of their parent XML_Parser inside of Expat.
761+
As a result, parent parsers need to outlive subparsers.
762+
763+
See https://github.com/python/cpython/issues/139400.
764+
"""
765+
766+
def test_parent_parser_outlives_its_subparsers__single(self):
767+
parser = expat.ParserCreate()
768+
subparser = parser.ExternalEntityParserCreate(None)
769+
770+
# Now try to cause garbage collection of the parent parser
771+
# while it's still being referenced by a related subparser.
772+
del parser
773+
774+
def test_parent_parser_outlives_its_subparsers__multiple(self):
775+
parser = expat.ParserCreate()
776+
subparser_one = parser.ExternalEntityParserCreate(None)
777+
subparser_two = parser.ExternalEntityParserCreate(None)
778+
779+
# Now try to cause garbage collection of the parent parser
780+
# while it's still being referenced by a related subparser.
781+
del parser
782+
783+
def test_parent_parser_outlives_its_subparsers__chain(self):
784+
parser = expat.ParserCreate()
785+
subparser = parser.ExternalEntityParserCreate(None)
786+
subsubparser = subparser.ExternalEntityParserCreate(None)
787+
788+
# Now try to cause garbage collection of the parent parsers
789+
# while they are still being referenced by a related subparser.
790+
del parser
791+
del subparser
792+
793+
758794
class ReparseDeferralTest(unittest.TestCase):
759795
def test_getter_setter_round_trip(self):
760796
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ typedef struct {
7474
PyObject_HEAD
7575

7676
XML_Parser itself;
77+
/*
78+
* Strong reference to a parent `xmlparseobject` if this parser
79+
* is a child parser. Set to NULL if this parser is a root parser.
80+
* This is needed to keep the parent parser alive as long as it has
81+
* at least one child parser.
82+
*
83+
* See https://github.com/python/cpython/issues/139400 for details.
84+
*/
85+
PyObject *parent;
7786
int ordered_attributes; /* Return attributes as a list. */
7887
int specified_attributes; /* Report only specified attributes. */
7988
int in_callback; /* Is a callback active? */
@@ -988,6 +997,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
988997
return NULL;
989998
}
990999

1000+
// The new subparser will make use of the parent XML_Parser inside of Expat.
1001+
// So we need to take subparsers into account with the reference counting
1002+
// of their parent parser.
1003+
Py_INCREF(self);
1004+
9911005
new_parser->buffer_size = self->buffer_size;
9921006
new_parser->buffer_used = 0;
9931007
new_parser->buffer = NULL;
@@ -997,18 +1011,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
9971011
new_parser->ns_prefixes = self->ns_prefixes;
9981012
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
9991013
encoding);
1014+
new_parser->parent = (PyObject *)self;
10001015
new_parser->handlers = 0;
10011016
new_parser->intern = Py_XNewRef(self->intern);
10021017

10031018
if (self->buffer != NULL) {
10041019
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
10051020
if (new_parser->buffer == NULL) {
10061021
Py_DECREF(new_parser);
1022+
Py_DECREF(self);
10071023
return PyErr_NoMemory();
10081024
}
10091025
}
10101026
if (!new_parser->itself) {
10111027
Py_DECREF(new_parser);
1028+
Py_DECREF(self);
10121029
return PyErr_NoMemory();
10131030
}
10141031

@@ -1021,6 +1038,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10211038
new_parser->handlers = PyMem_New(PyObject *, i);
10221039
if (!new_parser->handlers) {
10231040
Py_DECREF(new_parser);
1041+
Py_DECREF(self);
10241042
return PyErr_NoMemory();
10251043
}
10261044
clear_handlers(new_parser, 1);
@@ -1210,6 +1228,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
12101228
/* namespace_separator is either NULL or contains one char + \0 */
12111229
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
12121230
namespace_separator);
1231+
self->parent = NULL;
12131232
if (self->itself == NULL) {
12141233
PyErr_SetString(PyExc_RuntimeError,
12151234
"XML_ParserCreate failed");
@@ -1245,6 +1264,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
12451264
for (int i = 0; handler_info[i].name != NULL; i++) {
12461265
Py_VISIT(op->handlers[i]);
12471266
}
1267+
Py_VISIT(op->parent);
12481268
Py_VISIT(Py_TYPE(op));
12491269
return 0;
12501270
}
@@ -1254,6 +1274,10 @@ xmlparse_clear(xmlparseobject *op)
12541274
{
12551275
clear_handlers(op, 0);
12561276
Py_CLEAR(op->intern);
1277+
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1278+
// XML_ParserFree(op->itself), or a subparser could lose its parent
1279+
// XML_Parser while still making use of it internally.
1280+
// https://github.com/python/cpython/issues/139400
12571281
return 0;
12581282
}
12591283

@@ -1265,6 +1289,7 @@ xmlparse_dealloc(xmlparseobject *self)
12651289
if (self->itself != NULL)
12661290
XML_ParserFree(self->itself);
12671291
self->itself = NULL;
1292+
Py_CLEAR(self->parent);
12681293

12691294
if (self->handlers != NULL) {
12701295
PyMem_Free(self->handlers);

0 commit comments

Comments
 (0)