Skip to content

Commit 3334e29

Browse files
committed
pythongh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (python#139403)
* Modules/pyexpat.c: Disallow collection of in-use parent parsers. Within libexpat, a parser created via `XML_ExternalEntityParserCreate` is relying on its parent parser throughout its entire lifetime. Prior to this fix, is was possible for the parent parser to be garbage-collected too early. (cherry picked from commit 6edb2dd)
1 parent 4dea0fb commit 3334e29

File tree

3 files changed

+66
-1
lines changed

3 files changed

+66
-1
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,42 @@ def resolve_entity(context, base, system_id, public_id):
730730
self.assertEqual(handler_call_args, [("bar", "baz")])
731731

732732

733+
class ParentParserLifetimeTest(unittest.TestCase):
734+
"""
735+
Subparsers make use of their parent XML_Parser inside of Expat.
736+
As a result, parent parsers need to outlive subparsers.
737+
738+
See https://github.com/python/cpython/issues/139400.
739+
"""
740+
741+
def test_parent_parser_outlives_its_subparsers__single(self):
742+
parser = expat.ParserCreate()
743+
subparser = parser.ExternalEntityParserCreate(None)
744+
745+
# Now try to cause garbage collection of the parent parser
746+
# while it's still being referenced by a related subparser.
747+
del parser
748+
749+
def test_parent_parser_outlives_its_subparsers__multiple(self):
750+
parser = expat.ParserCreate()
751+
subparser_one = parser.ExternalEntityParserCreate(None)
752+
subparser_two = parser.ExternalEntityParserCreate(None)
753+
754+
# Now try to cause garbage collection of the parent parser
755+
# while it's still being referenced by a related subparser.
756+
del parser
757+
758+
def test_parent_parser_outlives_its_subparsers__chain(self):
759+
parser = expat.ParserCreate()
760+
subparser = parser.ExternalEntityParserCreate(None)
761+
subsubparser = subparser.ExternalEntityParserCreate(None)
762+
763+
# Now try to cause garbage collection of the parent parsers
764+
# while they are still being referenced by a related subparser.
765+
del parser
766+
del subparser
767+
768+
733769
class ReparseDeferralTest(unittest.TestCase):
734770
def test_getter_setter_round_trip(self):
735771
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ typedef struct {
5858
PyObject_HEAD
5959

6060
XML_Parser itself;
61+
/*
62+
* Strong reference to a parent `xmlparseobject` if this parser
63+
* is a child parser. Set to NULL if this parser is a root parser.
64+
* This is needed to keep the parent parser alive as long as it has
65+
* at least one child parser.
66+
*
67+
* See https://github.com/python/cpython/issues/139400 for details.
68+
*/
69+
PyObject *parent;
6170
int ordered_attributes; /* Return attributes as a list. */
6271
int specified_attributes; /* Report only specified attributes. */
6372
int in_callback; /* Is a callback active? */
@@ -967,6 +976,12 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
967976
new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
968977
if (new_parser == NULL)
969978
return NULL;
979+
980+
// The new subparser will make use of the parent XML_Parser inside of Expat.
981+
// So we need to take subparsers into account with the reference counting
982+
// of their parent parser.
983+
Py_INCREF(self);
984+
970985
new_parser->buffer_size = self->buffer_size;
971986
new_parser->buffer_used = 0;
972987
new_parser->buffer = NULL;
@@ -976,6 +991,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
976991
new_parser->ns_prefixes = self->ns_prefixes;
977992
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
978993
encoding);
994+
new_parser->parent = (PyObject *)self;
979995
new_parser->handlers = 0;
980996
new_parser->intern = self->intern;
981997
Py_XINCREF(new_parser->intern);
@@ -984,11 +1000,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
9841000
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
9851001
if (new_parser->buffer == NULL) {
9861002
Py_DECREF(new_parser);
1003+
Py_DECREF(self);
9871004
return PyErr_NoMemory();
9881005
}
9891006
}
9901007
if (!new_parser->itself) {
9911008
Py_DECREF(new_parser);
1009+
Py_DECREF(self);
9921010
return PyErr_NoMemory();
9931011
}
9941012

@@ -1001,6 +1019,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10011019
new_parser->handlers = PyMem_New(PyObject *, i);
10021020
if (!new_parser->handlers) {
10031021
Py_DECREF(new_parser);
1022+
Py_DECREF(self);
10041023
return PyErr_NoMemory();
10051024
}
10061025
clear_handlers(new_parser, 1);
@@ -1175,6 +1194,7 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec
11751194
/* namespace_separator is either NULL or contains one char + \0 */
11761195
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
11771196
namespace_separator);
1197+
self->parent = NULL;
11781198
if (self->itself == NULL) {
11791199
PyErr_SetString(PyExc_RuntimeError,
11801200
"XML_ParserCreate failed");
@@ -1204,7 +1224,6 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec
12041224
return (PyObject*)self;
12051225
}
12061226

1207-
12081227
static void
12091228
xmlparse_dealloc(xmlparseobject *self)
12101229
{
@@ -1213,6 +1232,7 @@ xmlparse_dealloc(xmlparseobject *self)
12131232
if (self->itself != NULL)
12141233
XML_ParserFree(self->itself);
12151234
self->itself = NULL;
1235+
Py_CLEAR(self->parent);
12161236

12171237
if (self->handlers != NULL) {
12181238
for (i = 0; handler_info[i].name != NULL; i++)
@@ -1499,6 +1519,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
14991519
int i;
15001520
for (i = 0; handler_info[i].name != NULL; i++)
15011521
Py_VISIT(op->handlers[i]);
1522+
Py_VISIT(op->parent);
15021523
return 0;
15031524
}
15041525

@@ -1507,6 +1528,10 @@ xmlparse_clear(xmlparseobject *op)
15071528
{
15081529
clear_handlers(op, 0);
15091530
Py_CLEAR(op->intern);
1531+
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1532+
// XML_ParserFree(op->itself), or a subparser could lose its parent
1533+
// XML_Parser while still making use of it internally.
1534+
// https://github.com/python/cpython/issues/139400
15101535
return 0;
15111536
}
15121537

0 commit comments

Comments
 (0)