@@ -74,6 +74,15 @@ typedef struct {
7474 PyObject_HEAD
7575
7676 XML_Parser itself ;
77+ /*
78+ * Strong reference to a parent `xmlparseobject` if this parser
79+ * is a child parser. Set to NULL if this parser is a root parser.
80+ * This is needed to keep the parent parser alive as long as it has
81+ * at least one child parser.
82+ *
83+ * See https://github.com/python/cpython/issues/139400 for details.
84+ */
85+ PyObject * parent ;
7786 int ordered_attributes ; /* Return attributes as a list. */
7887 int specified_attributes ; /* Report only specified attributes. */
7988 int in_callback ; /* Is a callback active? */
@@ -1019,6 +1028,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10191028 return NULL ;
10201029 }
10211030
1031+ // The new subparser will make use of the parent XML_Parser inside of Expat.
1032+ // So we need to take subparsers into account with the reference counting
1033+ // of their parent parser.
1034+ Py_INCREF (self );
1035+
10221036 new_parser -> buffer_size = self -> buffer_size ;
10231037 new_parser -> buffer_used = 0 ;
10241038 new_parser -> buffer = NULL ;
@@ -1028,18 +1042,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10281042 new_parser -> ns_prefixes = self -> ns_prefixes ;
10291043 new_parser -> itself = XML_ExternalEntityParserCreate (self -> itself , context ,
10301044 encoding );
1045+ new_parser -> parent = (PyObject * )self ;
10311046 new_parser -> handlers = 0 ;
10321047 new_parser -> intern = Py_XNewRef (self -> intern );
10331048
10341049 if (self -> buffer != NULL ) {
10351050 new_parser -> buffer = PyMem_Malloc (new_parser -> buffer_size );
10361051 if (new_parser -> buffer == NULL ) {
10371052 Py_DECREF (new_parser );
1053+ Py_DECREF (self );
10381054 return PyErr_NoMemory ();
10391055 }
10401056 }
10411057 if (!new_parser -> itself ) {
10421058 Py_DECREF (new_parser );
1059+ Py_DECREF (self );
10431060 return PyErr_NoMemory ();
10441061 }
10451062
@@ -1053,6 +1070,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10531070 new_parser -> handlers = PyMem_New (PyObject * , i );
10541071 if (!new_parser -> handlers ) {
10551072 Py_DECREF (new_parser );
1073+ Py_DECREF (self );
10561074 return PyErr_NoMemory ();
10571075 }
10581076 clear_handlers (new_parser , 1 );
@@ -1242,6 +1260,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
12421260 /* namespace_separator is either NULL or contains one char + \0 */
12431261 self -> itself = XML_ParserCreate_MM (encoding , & ExpatMemoryHandler ,
12441262 namespace_separator );
1263+ self -> parent = NULL ;
12451264 if (self -> itself == NULL ) {
12461265 PyErr_SetString (PyExc_RuntimeError ,
12471266 "XML_ParserCreate failed" );
@@ -1278,6 +1297,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
12781297 for (size_t i = 0 ; handler_info [i ].name != NULL ; i ++ ) {
12791298 Py_VISIT (self -> handlers [i ]);
12801299 }
1300+ Py_VISIT (self -> parent );
12811301 Py_VISIT (Py_TYPE (op ));
12821302 return 0 ;
12831303}
@@ -1288,6 +1308,10 @@ xmlparse_clear(PyObject *op)
12881308 xmlparseobject * self = xmlparseobject_CAST (op );
12891309 clear_handlers (self , 0 );
12901310 Py_CLEAR (self -> intern );
1311+ // NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
1312+ // XML_ParserFree(self->itself), or a subparser could lose its parent
1313+ // XML_Parser while still making use of it internally.
1314+ // https://github.com/python/cpython/issues/139400
12911315 return 0 ;
12921316}
12931317
@@ -1301,6 +1325,7 @@ xmlparse_dealloc(PyObject *op)
13011325 XML_ParserFree (self -> itself );
13021326 }
13031327 self -> itself = NULL ;
1328+ Py_CLEAR (self -> parent );
13041329
13051330 if (self -> handlers != NULL ) {
13061331 PyMem_Free (self -> handlers );
0 commit comments