@@ -69,6 +69,15 @@ typedef struct {
6969 PyObject_HEAD
7070
7171 XML_Parser itself ;
72+ /*
73+ * Strong reference to a parent `xmlparseobject` if this parser
74+ * is a child parser. Set to NULL if this parser is a root parser.
75+ * This is needed to keep the parent parser alive as long as it has
76+ * at least one child parser.
77+ *
78+ * See https://github.com/python/cpython/issues/139400 for details.
79+ */
80+ PyObject * parent ;
7281 int ordered_attributes ; /* Return attributes as a list. */
7382 int specified_attributes ; /* Report only specified attributes. */
7483 int in_callback ; /* Is a callback active? */
@@ -990,6 +999,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
990999 return NULL ;
9911000 }
9921001
1002+ // The new subparser will make use of the parent XML_Parser inside of Expat.
1003+ // So we need to take subparsers into account with the reference counting
1004+ // of their parent parser.
1005+ Py_INCREF (self );
1006+
9931007 new_parser -> buffer_size = self -> buffer_size ;
9941008 new_parser -> buffer_used = 0 ;
9951009 new_parser -> buffer = NULL ;
@@ -999,18 +1013,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
9991013 new_parser -> ns_prefixes = self -> ns_prefixes ;
10001014 new_parser -> itself = XML_ExternalEntityParserCreate (self -> itself , context ,
10011015 encoding );
1016+ new_parser -> parent = (PyObject * )self ;
10021017 new_parser -> handlers = 0 ;
10031018 new_parser -> intern = Py_XNewRef (self -> intern );
10041019
10051020 if (self -> buffer != NULL ) {
10061021 new_parser -> buffer = PyMem_Malloc (new_parser -> buffer_size );
10071022 if (new_parser -> buffer == NULL ) {
10081023 Py_DECREF (new_parser );
1024+ Py_DECREF (self );
10091025 return PyErr_NoMemory ();
10101026 }
10111027 }
10121028 if (!new_parser -> itself ) {
10131029 Py_DECREF (new_parser );
1030+ Py_DECREF (self );
10141031 return PyErr_NoMemory ();
10151032 }
10161033
@@ -1023,6 +1040,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10231040 new_parser -> handlers = PyMem_New (PyObject * , i );
10241041 if (!new_parser -> handlers ) {
10251042 Py_DECREF (new_parser );
1043+ Py_DECREF (self );
10261044 return PyErr_NoMemory ();
10271045 }
10281046 clear_handlers (new_parser , 1 );
@@ -1212,6 +1230,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
12121230 /* namespace_separator is either NULL or contains one char + \0 */
12131231 self -> itself = XML_ParserCreate_MM (encoding , & ExpatMemoryHandler ,
12141232 namespace_separator );
1233+ self -> parent = NULL ;
12151234 if (self -> itself == NULL ) {
12161235 PyErr_SetString (PyExc_RuntimeError ,
12171236 "XML_ParserCreate failed" );
@@ -1247,6 +1266,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
12471266 for (int i = 0 ; handler_info [i ].name != NULL ; i ++ ) {
12481267 Py_VISIT (op -> handlers [i ]);
12491268 }
1269+ Py_VISIT (op -> parent );
12501270 Py_VISIT (Py_TYPE (op ));
12511271 return 0 ;
12521272}
@@ -1256,6 +1276,10 @@ xmlparse_clear(xmlparseobject *op)
12561276{
12571277 clear_handlers (op , 0 );
12581278 Py_CLEAR (op -> intern );
1279+ // NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1280+ // XML_ParserFree(op->itself), or a subparser could lose its parent
1281+ // XML_Parser while still making use of it internally.
1282+ // https://github.com/python/cpython/issues/139400
12591283 return 0 ;
12601284}
12611285
@@ -1267,6 +1291,7 @@ xmlparse_dealloc(xmlparseobject *self)
12671291 if (self -> itself != NULL )
12681292 XML_ParserFree (self -> itself );
12691293 self -> itself = NULL ;
1294+ Py_CLEAR (self -> parent );
12701295
12711296 if (self -> handlers != NULL ) {
12721297 PyMem_Free (self -> handlers );
0 commit comments