@@ -74,6 +74,15 @@ typedef struct {
7474 PyObject_HEAD
7575
7676 XML_Parser itself ;
77+ /*
78+ * Strong reference to a parent `xmlparseobject` if this parser
79+ * is a child parser. Set to NULL if this parser is a root parser.
80+ * This is needed to keep the parent parser alive as long as it has
81+ * at least one child parser.
82+ *
83+ * See https://github.com/python/cpython/issues/139400 for details.
84+ */
85+ PyObject * parent ;
7786 int ordered_attributes ; /* Return attributes as a list. */
7887 int specified_attributes ; /* Report only specified attributes. */
7988 int in_callback ; /* Is a callback active? */
@@ -988,6 +997,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
988997 return NULL ;
989998 }
990999
1000+ // The new subparser will make use of the parent XML_Parser inside of Expat.
1001+ // So we need to take subparsers into account with the reference counting
1002+ // of their parent parser.
1003+ Py_INCREF (self );
1004+
9911005 new_parser -> buffer_size = self -> buffer_size ;
9921006 new_parser -> buffer_used = 0 ;
9931007 new_parser -> buffer = NULL ;
@@ -997,18 +1011,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
9971011 new_parser -> ns_prefixes = self -> ns_prefixes ;
9981012 new_parser -> itself = XML_ExternalEntityParserCreate (self -> itself , context ,
9991013 encoding );
1014+ new_parser -> parent = (PyObject * )self ;
10001015 new_parser -> handlers = 0 ;
10011016 new_parser -> intern = Py_XNewRef (self -> intern );
10021017
10031018 if (self -> buffer != NULL ) {
10041019 new_parser -> buffer = PyMem_Malloc (new_parser -> buffer_size );
10051020 if (new_parser -> buffer == NULL ) {
10061021 Py_DECREF (new_parser );
1022+ Py_DECREF (self );
10071023 return PyErr_NoMemory ();
10081024 }
10091025 }
10101026 if (!new_parser -> itself ) {
10111027 Py_DECREF (new_parser );
1028+ Py_DECREF (self );
10121029 return PyErr_NoMemory ();
10131030 }
10141031
@@ -1021,6 +1038,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10211038 new_parser -> handlers = PyMem_New (PyObject * , i );
10221039 if (!new_parser -> handlers ) {
10231040 Py_DECREF (new_parser );
1041+ Py_DECREF (self );
10241042 return PyErr_NoMemory ();
10251043 }
10261044 clear_handlers (new_parser , 1 );
@@ -1210,6 +1228,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
12101228 /* namespace_separator is either NULL or contains one char + \0 */
12111229 self -> itself = XML_ParserCreate_MM (encoding , & ExpatMemoryHandler ,
12121230 namespace_separator );
1231+ self -> parent = NULL ;
12131232 if (self -> itself == NULL ) {
12141233 PyErr_SetString (PyExc_RuntimeError ,
12151234 "XML_ParserCreate failed" );
@@ -1245,6 +1264,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
12451264 for (int i = 0 ; handler_info [i ].name != NULL ; i ++ ) {
12461265 Py_VISIT (op -> handlers [i ]);
12471266 }
1267+ Py_VISIT (op -> parent );
12481268 Py_VISIT (Py_TYPE (op ));
12491269 return 0 ;
12501270}
@@ -1254,6 +1274,10 @@ xmlparse_clear(xmlparseobject *op)
12541274{
12551275 clear_handlers (op , 0 );
12561276 Py_CLEAR (op -> intern );
1277+ // NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1278+ // XML_ParserFree(op->itself), or a subparser could lose its parent
1279+ // XML_Parser while still making use of it internally.
1280+ // https://github.com/python/cpython/issues/139400
12571281 return 0 ;
12581282}
12591283
@@ -1265,6 +1289,7 @@ xmlparse_dealloc(xmlparseobject *self)
12651289 if (self -> itself != NULL )
12661290 XML_ParserFree (self -> itself );
12671291 self -> itself = NULL ;
1292+ Py_CLEAR (self -> parent );
12681293
12691294 if (self -> handlers != NULL ) {
12701295 PyMem_Free (self -> handlers );
0 commit comments