From 1270135384fab4d15a0b4332666c27c0f32da871 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 28 Aug 2025 21:55:33 +0200 Subject: [PATCH 1/3] Fix GH-19612: Mitigate libxml2 tree dictionary bug This code is very similar to code on PHP 8.4 and higher, but the mitigation is extended to entity references and to attribute children. --- ext/dom/document.c | 55 ++++++++++++++++++++++++++++++++++++-- ext/dom/tests/gh19612.phpt | 30 +++++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 ext/dom/tests/gh19612.phpt diff --git a/ext/dom/document.c b/ext/dom/document.c index e622a09309b6e..7be7467c30f0b 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1076,10 +1076,56 @@ static void php_dom_transfer_document_ref(xmlNodePtr node, php_libxml_ref_obj *n } } +/* Workaround for bug that was fixed in https://github.com/GNOME/libxml2/commit/4bc3ebf3eaba352fbbce2ef70ad00a3c7752478a */ +#if LIBXML_VERSION < 21000 +static xmlChar *libxml_copy_dicted_string(xmlDictPtr src_dict, xmlDictPtr dst_dict, xmlChar *str) +{ + if (str == NULL) { + return NULL; + } + if (xmlDictOwns(src_dict, str) == 1) { + if (dst_dict == NULL) { + return xmlStrdup(str); + } + return BAD_CAST xmlDictLookup(dst_dict, str, -1); + } + return str; +} + +static void libxml_fixup_name_and_content(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) +{ + if (node->type == XML_ENTITY_REF_NODE) { + node->children = NULL; /* Break link with original document. */ + } + if (src_doc != NULL && src_doc->dict != NULL) { + ZEND_ASSERT(dst_doc != src_doc); + node->name = libxml_copy_dicted_string(src_doc->dict, dst_doc->dict, BAD_CAST node->name); + node->content = libxml_copy_dicted_string(src_doc->dict, NULL, node->content); + } +} + +static void libxml_fixup_name_and_content_element(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) +{ + libxml_fixup_name_and_content(src_doc, dst_doc, node); + for (xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, (xmlNodePtr) attr); + for (xmlNodePtr attr_child = attr->children; attr_child != NULL; attr_child = attr_child->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, attr_child); + } + } + + for (xmlNodePtr child = node->children; child != NULL; child = child->next) { + libxml_fixup_name_and_content_element(src_doc, dst_doc, child); + } +} +#endif + bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, xmlDocPtr new_document) { - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); - if (nodep->doc != new_document) { + xmlDocPtr old_doc = nodep->doc; + + php_libxml_invalidate_node_list_cache_from_doc(old_doc); + if (old_doc != new_document) { php_libxml_invalidate_node_list_cache(dom_object_new_document->document); /* Note for ATTRIBUTE_NODE: specified is always true in ext/dom, @@ -1089,6 +1135,11 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x return false; } +#if LIBXML_VERSION < 21000 + /* Must be first before transferring the ref to ensure the old document dictionary stays alive. */ + libxml_fixup_name_and_content_element(old_doc, new_document, nodep); +#endif + php_dom_transfer_document_ref(nodep, dom_object_new_document->document); } else { xmlUnlinkNode(nodep); diff --git a/ext/dom/tests/gh19612.phpt b/ext/dom/tests/gh19612.phpt new file mode 100644 index 0000000000000..38554f3c83605 --- /dev/null +++ b/ext/dom/tests/gh19612.phpt @@ -0,0 +1,30 @@ +--TEST-- +GH-19612 (Mitigate libxml2 tree dictionary bug) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< +]> + +XML); +$html = new DOMDocument; +$html->loadHTML('

foo

', LIBXML_NOERROR); +$p = $html->documentElement->firstChild->firstChild; +$p->appendChild($html->adoptNode($xml->documentElement->firstElementChild->cloneNode(true))); + +echo $html->saveXML(); +echo $xml->saveXML(); +?> +--EXPECT-- + + +

foo

+ + +]> + From 4879004ddb96a46f61c7b7a6dcbfbdcc7f3df7d2 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 29 Aug 2025 23:53:34 +0200 Subject: [PATCH 2/3] Workaround a second libxml bug --- ext/dom/document.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/ext/dom/document.c b/ext/dom/document.c index 7be7467c30f0b..b1d734dd9580e 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1076,8 +1076,9 @@ static void php_dom_transfer_document_ref(xmlNodePtr node, php_libxml_ref_obj *n } } -/* Workaround for bug that was fixed in https://github.com/GNOME/libxml2/commit/4bc3ebf3eaba352fbbce2ef70ad00a3c7752478a */ -#if LIBXML_VERSION < 21000 +/* Workaround for bug that was fixed in https://github.com/GNOME/libxml2/commit/4bc3ebf3eaba352fbbce2ef70ad00a3c7752478a + * and https://github.com/GNOME/libxml2/commit/bc7ab5a2e61e4b36accf6803c5b0e245c11154b1 */ +#if LIBXML_VERSION < 21300 static xmlChar *libxml_copy_dicted_string(xmlDictPtr src_dict, xmlDictPtr dst_dict, xmlChar *str) { if (str == NULL) { @@ -1104,18 +1105,23 @@ static void libxml_fixup_name_and_content(xmlDocPtr src_doc, xmlDocPtr dst_doc, } } -static void libxml_fixup_name_and_content_element(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) +static void libxml_fixup_name_and_content_outer(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) { libxml_fixup_name_and_content(src_doc, dst_doc, node); - for (xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) { - libxml_fixup_name_and_content(src_doc, dst_doc, (xmlNodePtr) attr); - for (xmlNodePtr attr_child = attr->children; attr_child != NULL; attr_child = attr_child->next) { - libxml_fixup_name_and_content(src_doc, dst_doc, attr_child); + + if (node->type == XML_ELEMENT_NODE) { + for (xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, (xmlNodePtr) attr); + for (xmlNodePtr attr_child = attr->children; attr_child != NULL; attr_child = attr_child->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, attr_child); + } } } - for (xmlNodePtr child = node->children; child != NULL; child = child->next) { - libxml_fixup_name_and_content_element(src_doc, dst_doc, child); + if (node->type == XML_ELEMENT_NODE || node->type == XML_ATTRIBUTE_NODE) { + for (xmlNodePtr child = node->children; child != NULL; child = child->next) { + libxml_fixup_name_and_content_outer(src_doc, dst_doc, child); + } } } #endif @@ -1135,9 +1141,11 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x return false; } -#if LIBXML_VERSION < 21000 +#if LIBXML_VERSION < 21300 /* Must be first before transferring the ref to ensure the old document dictionary stays alive. */ - libxml_fixup_name_and_content_element(old_doc, new_document, nodep); + if (LIBXML_VERSION < 21000 || nodep->type == XML_ATTRIBUTE_NODE) { + libxml_fixup_name_and_content_outer(old_doc, new_document, nodep); + } #endif php_dom_transfer_document_ref(nodep, dom_object_new_document->document); From f4bfddb588014cf56fe5bfd9ece38c430db66f7f Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 29 Aug 2025 23:58:04 +0200 Subject: [PATCH 3/3] Simplify condition for more edge cases --- ext/dom/document.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ext/dom/document.c b/ext/dom/document.c index b1d734dd9580e..e48cafbabe9cd 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1143,9 +1143,7 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x #if LIBXML_VERSION < 21300 /* Must be first before transferring the ref to ensure the old document dictionary stays alive. */ - if (LIBXML_VERSION < 21000 || nodep->type == XML_ATTRIBUTE_NODE) { - libxml_fixup_name_and_content_outer(old_doc, new_document, nodep); - } + libxml_fixup_name_and_content_outer(old_doc, new_document, nodep); #endif php_dom_transfer_document_ref(nodep, dom_object_new_document->document);