Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,42 @@ def resolve_entity(context, base, system_id, public_id):
self.assertEqual(handler_call_args, [("bar", "baz")])


class ParentParserLifetimeTest(unittest.TestCase):
"""
Subparsers make use of their parent XML_Parser inside of Expat.
As a result, parent parsers need to outlive subparsers.

See https://github.com/python/cpython/issues/139400.
"""

def test_parent_parser_outlives_its_subparsers__single(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)

# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser

def test_parent_parser_outlives_its_subparsers__multiple(self):
parser = expat.ParserCreate()
subparser_one = parser.ExternalEntityParserCreate(None)
subparser_two = parser.ExternalEntityParserCreate(None)

# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser

def test_parent_parser_outlives_its_subparsers__chain(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
subsubparser = subparser.ExternalEntityParserCreate(None)

# Now try to cause garbage collection of the parent parsers
# while they are still being referenced by a related subparser.
del parser
del subparser


class ReparseDeferralTest(unittest.TestCase):
def test_getter_setter_round_trip(self):
parser = expat.ParserCreate()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
garbage-collected once they are no longer referenced by subparsers created
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
Patch by Sebastian Pipping.
27 changes: 26 additions & 1 deletion Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ typedef struct {
PyObject_HEAD

XML_Parser itself;
/*
* Strong reference to a parent `xmlparseobject` if this parser
* is a child parser. Set to NULL if this parser is a root parser.
* This is needed to keep the parent parser alive as long as it has
* at least one child parser.
*
* See https://github.com/python/cpython/issues/139400 for details.
*/
PyObject *parent;
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
Expand Down Expand Up @@ -967,6 +976,12 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
if (new_parser == NULL)
return NULL;

// The new subparser will make use of the parent XML_Parser inside of Expat.
// So we need to take subparsers into account with the reference counting
// of their parent parser.
Py_INCREF(self);

new_parser->buffer_size = self->buffer_size;
new_parser->buffer_used = 0;
new_parser->buffer = NULL;
Expand All @@ -976,6 +991,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->ns_prefixes = self->ns_prefixes;
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding);
new_parser->parent = (PyObject *)self;
new_parser->handlers = 0;
new_parser->intern = self->intern;
Py_XINCREF(new_parser->intern);
Expand All @@ -984,11 +1000,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
if (new_parser->buffer == NULL) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
}
if (!new_parser->itself) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}

Expand All @@ -1001,6 +1019,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->handlers = PyMem_New(PyObject *, i);
if (!new_parser->handlers) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
clear_handlers(new_parser, 1);
Expand Down Expand Up @@ -1175,6 +1194,7 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec
/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
namespace_separator);
self->parent = NULL;
if (self->itself == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"XML_ParserCreate failed");
Expand Down Expand Up @@ -1204,7 +1224,6 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec
return (PyObject*)self;
}


static void
xmlparse_dealloc(xmlparseobject *self)
{
Expand All @@ -1213,6 +1232,7 @@ xmlparse_dealloc(xmlparseobject *self)
if (self->itself != NULL)
XML_ParserFree(self->itself);
self->itself = NULL;
Py_CLEAR(self->parent);

if (self->handlers != NULL) {
for (i = 0; handler_info[i].name != NULL; i++)
Expand Down Expand Up @@ -1499,6 +1519,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
int i;
for (i = 0; handler_info[i].name != NULL; i++)
Py_VISIT(op->handlers[i]);
Py_VISIT(op->parent);
Copy link
Member

@picnixz picnixz Oct 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: if the type is a static type, then it's fine not to visit Py_TYPE(op). Here it's therefore correct because the type is a static type and not a heap type.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@picnixz I'm assuming your are referring to static PyTypeObject Xmlparsetype here? Good to know, thanks for checking!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this one. Static types are now only used for built-in types such as exceptions, numbers, and standard objects. Types in extension modules are now mostly heap types.

return 0;
}

Expand All @@ -1507,6 +1528,10 @@ xmlparse_clear(xmlparseobject *op)
{
clear_handlers(op, 0);
Py_CLEAR(op->intern);
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
// XML_ParserFree(op->itself), or a subparser could lose its parent
// XML_Parser while still making use of it internally.
// https://github.com/python/cpython/issues/139400
return 0;
}

Expand Down
Loading