Skip to content

Commit 66ca0d9

Browse files
[3.14] gh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (GH-139403) (#139606)
Co-authored-by: Sebastian Pipping <[email protected]>
1 parent 08d2892 commit 66ca0d9

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,42 @@ def resolve_entity(context, base, system_id, public_id):
755755
self.assertEqual(handler_call_args, [("bar", "baz")])
756756

757757

758+
class ParentParserLifetimeTest(unittest.TestCase):
759+
"""
760+
Subparsers make use of their parent XML_Parser inside of Expat.
761+
As a result, parent parsers need to outlive subparsers.
762+
763+
See https://github.com/python/cpython/issues/139400.
764+
"""
765+
766+
def test_parent_parser_outlives_its_subparsers__single(self):
767+
parser = expat.ParserCreate()
768+
subparser = parser.ExternalEntityParserCreate(None)
769+
770+
# Now try to cause garbage collection of the parent parser
771+
# while it's still being referenced by a related subparser.
772+
del parser
773+
774+
def test_parent_parser_outlives_its_subparsers__multiple(self):
775+
parser = expat.ParserCreate()
776+
subparser_one = parser.ExternalEntityParserCreate(None)
777+
subparser_two = parser.ExternalEntityParserCreate(None)
778+
779+
# Now try to cause garbage collection of the parent parser
780+
# while it's still being referenced by a related subparser.
781+
del parser
782+
783+
def test_parent_parser_outlives_its_subparsers__chain(self):
784+
parser = expat.ParserCreate()
785+
subparser = parser.ExternalEntityParserCreate(None)
786+
subsubparser = subparser.ExternalEntityParserCreate(None)
787+
788+
# Now try to cause garbage collection of the parent parsers
789+
# while they are still being referenced by a related subparser.
790+
del parser
791+
del subparser
792+
793+
758794
class ReparseDeferralTest(unittest.TestCase):
759795
def test_getter_setter_round_trip(self):
760796
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ typedef struct {
7474
PyObject_HEAD
7575

7676
XML_Parser itself;
77+
/*
78+
* Strong reference to a parent `xmlparseobject` if this parser
79+
* is a child parser. Set to NULL if this parser is a root parser.
80+
* This is needed to keep the parent parser alive as long as it has
81+
* at least one child parser.
82+
*
83+
* See https://github.com/python/cpython/issues/139400 for details.
84+
*/
85+
PyObject *parent;
7786
int ordered_attributes; /* Return attributes as a list. */
7887
int specified_attributes; /* Report only specified attributes. */
7988
int in_callback; /* Is a callback active? */
@@ -1019,6 +1028,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10191028
return NULL;
10201029
}
10211030

1031+
// The new subparser will make use of the parent XML_Parser inside of Expat.
1032+
// So we need to take subparsers into account with the reference counting
1033+
// of their parent parser.
1034+
Py_INCREF(self);
1035+
10221036
new_parser->buffer_size = self->buffer_size;
10231037
new_parser->buffer_used = 0;
10241038
new_parser->buffer = NULL;
@@ -1028,18 +1042,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10281042
new_parser->ns_prefixes = self->ns_prefixes;
10291043
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
10301044
encoding);
1045+
new_parser->parent = (PyObject *)self;
10311046
new_parser->handlers = 0;
10321047
new_parser->intern = Py_XNewRef(self->intern);
10331048

10341049
if (self->buffer != NULL) {
10351050
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
10361051
if (new_parser->buffer == NULL) {
10371052
Py_DECREF(new_parser);
1053+
Py_DECREF(self);
10381054
return PyErr_NoMemory();
10391055
}
10401056
}
10411057
if (!new_parser->itself) {
10421058
Py_DECREF(new_parser);
1059+
Py_DECREF(self);
10431060
return PyErr_NoMemory();
10441061
}
10451062

@@ -1053,6 +1070,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10531070
new_parser->handlers = PyMem_New(PyObject *, i);
10541071
if (!new_parser->handlers) {
10551072
Py_DECREF(new_parser);
1073+
Py_DECREF(self);
10561074
return PyErr_NoMemory();
10571075
}
10581076
clear_handlers(new_parser, 1);
@@ -1242,6 +1260,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
12421260
/* namespace_separator is either NULL or contains one char + \0 */
12431261
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
12441262
namespace_separator);
1263+
self->parent = NULL;
12451264
if (self->itself == NULL) {
12461265
PyErr_SetString(PyExc_RuntimeError,
12471266
"XML_ParserCreate failed");
@@ -1278,6 +1297,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
12781297
for (size_t i = 0; handler_info[i].name != NULL; i++) {
12791298
Py_VISIT(self->handlers[i]);
12801299
}
1300+
Py_VISIT(self->parent);
12811301
Py_VISIT(Py_TYPE(op));
12821302
return 0;
12831303
}
@@ -1288,6 +1308,10 @@ xmlparse_clear(PyObject *op)
12881308
xmlparseobject *self = xmlparseobject_CAST(op);
12891309
clear_handlers(self, 0);
12901310
Py_CLEAR(self->intern);
1311+
// NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
1312+
// XML_ParserFree(self->itself), or a subparser could lose its parent
1313+
// XML_Parser while still making use of it internally.
1314+
// https://github.com/python/cpython/issues/139400
12911315
return 0;
12921316
}
12931317

@@ -1301,6 +1325,7 @@ xmlparse_dealloc(PyObject *op)
13011325
XML_ParserFree(self->itself);
13021326
}
13031327
self->itself = NULL;
1328+
Py_CLEAR(self->parent);
13041329

13051330
if (self->handlers != NULL) {
13061331
PyMem_Free(self->handlers);

0 commit comments

Comments
 (0)