Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,42 @@ def resolve_entity(context, base, system_id, public_id):
self.assertEqual(handler_call_args, [("bar", "baz")])


class ParentParserLifetimeTest(unittest.TestCase):
"""
Subparsers make use of their parent XML_Parser inside of Expat.
As a result, parent parsers need to outlive subparsers.

See https://github.com/python/cpython/issues/139400.
"""

def test_parent_parser_outlives_its_subparsers__single(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)

# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser

def test_parent_parser_outlives_its_subparsers__multiple(self):
parser = expat.ParserCreate()
subparser_one = parser.ExternalEntityParserCreate(None)
subparser_two = parser.ExternalEntityParserCreate(None)

# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser

def test_parent_parser_outlives_its_subparsers__chain(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
subsubparser = subparser.ExternalEntityParserCreate(None)

# Now try to cause garbage collection of the parent parsers
# while they are still being referenced by a related subparser.
del parser
del subparser


class ReparseDeferralTest(unittest.TestCase):
def test_getter_setter_round_trip(self):
parser = expat.ParserCreate()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
garbage-collected once they are no longer referenced by subparsers created
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
Patch by Sebastian Pipping.
25 changes: 25 additions & 0 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ typedef struct {
PyObject_HEAD

XML_Parser itself;
/*
* Strong reference to a parent `xmlparseobject` if this parser
* is a child parser. Set to NULL if this parser is a root parser.
* This is needed to keep the parent parser alive as long as it has
* at least one child parser.
*
* See https://github.com/python/cpython/issues/139400 for details.
*/
PyObject *parent;
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
Expand Down Expand Up @@ -1019,6 +1028,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
return NULL;
}

// The new subparser will make use of the parent XML_Parser inside of Expat.
// So we need to take subparsers into account with the reference counting
// of their parent parser.
Py_INCREF(self);

new_parser->buffer_size = self->buffer_size;
new_parser->buffer_used = 0;
new_parser->buffer = NULL;
Expand All @@ -1028,18 +1042,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->ns_prefixes = self->ns_prefixes;
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding);
new_parser->parent = (PyObject *)self;
new_parser->handlers = 0;
new_parser->intern = Py_XNewRef(self->intern);

if (self->buffer != NULL) {
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
if (new_parser->buffer == NULL) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
}
if (!new_parser->itself) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}

Expand All @@ -1053,6 +1070,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->handlers = PyMem_New(PyObject *, i);
if (!new_parser->handlers) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
clear_handlers(new_parser, 1);
Expand Down Expand Up @@ -1242,6 +1260,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
namespace_separator);
self->parent = NULL;
if (self->itself == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"XML_ParserCreate failed");
Expand Down Expand Up @@ -1278,6 +1297,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
for (size_t i = 0; handler_info[i].name != NULL; i++) {
Py_VISIT(self->handlers[i]);
}
Py_VISIT(self->parent);
Py_VISIT(Py_TYPE(op));
return 0;
}
Expand All @@ -1288,6 +1308,10 @@ xmlparse_clear(PyObject *op)
xmlparseobject *self = xmlparseobject_CAST(op);
clear_handlers(self, 0);
Py_CLEAR(self->intern);
// NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
// XML_ParserFree(self->itself), or a subparser could lose its parent
// XML_Parser while still making use of it internally.
// https://github.com/python/cpython/issues/139400
return 0;
}

Expand All @@ -1301,6 +1325,7 @@ xmlparse_dealloc(PyObject *op)
XML_ParserFree(self->itself);
}
self->itself = NULL;
Py_CLEAR(self->parent);

if (self->handlers != NULL) {
PyMem_Free(self->handlers);
Expand Down
Loading