diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index f65baa0cfae2ad..42da5f9b3b0eee 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -32,7 +32,6 @@ from test.support.import_helper import import_fresh_module from test.support.os_helper import TESTFN - # pyET is the pure-Python implementation. # # ET is pyET in test_xml_etree and is the C accelerated version in @@ -4668,6 +4667,34 @@ def cleanup(): old_factories = ET._set_factories(ET.Comment, ET.PI) unittest.addModuleCleanup(ET._set_factories, *old_factories) +class TestElementTreeGlobalNamespace(unittest.TestCase): + def test_find_uses_registered_namespace(self): + xml_data = """ + + Apple + + """ + ET.register_namespace("h", "http://www.w3.org/TR/html4/") + tree = ET.ElementTree(ET.fromstring(xml_data)) + + # should work without passing namespaces explicitly + elem = tree.find(".//h:title") + self.assertIsNotNone(elem) + self.assertEqual(elem.tag, "{http://www.w3.org/TR/html4/}title") + + def test_findall_and_findtext_with_global_ns(self): + xml_data = """ + Apple + Banana + """ + ET.register_namespace("h", "http://www.w3.org/TR/html4/") + root = ET.fromstring(xml_data) + + items = root.findall(".//h:item") + self.assertEqual(len(items), 2) + + first_text = root.findtext(".//h:item") + self.assertEqual(first_text, "Apple") if __name__ == '__main__': unittest.main() diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index dc6bd28c03137d..18bee5b60bf129 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -18,6 +18,8 @@ # # fredrik@pythonware.com # http://www.pythonware.com + +from . import ElementTree # # -------------------------------------------------------------------- # The ElementTree toolkit is @@ -79,11 +81,14 @@ def xpath_tokenizer(pattern, namespaces=None): if tag and tag[0] != "{": if ":" in tag: prefix, uri = tag.split(":", 1) - try: - if not namespaces: - raise KeyError + if namespaces and prefix in namespaces: + # Use the passed-in namespace map first yield ttype, "{%s}%s" % (namespaces[prefix], uri) - except KeyError: + elif prefix in ElementTree._namespace_map: + # Then check the global registry + yield ttype, "{%s}%s" % (ElementTree._namespace_map[prefix], uri) + else: + # No namespace found, raise error raise SyntaxError("prefix %r not found in prefix map" % prefix) from None elif default_namespace and not parsing_attribute: yield ttype, "{%s}%s" % (default_namespace, tag) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index dafe5b1b8a0c3f..44e9c0cfac8495 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -102,6 +102,7 @@ import weakref from . import ElementPath +import xml.etree.ElementTree as _ET class ParseError(SyntaxError): @@ -167,7 +168,9 @@ class Element: """ - def __init__(self, tag, attrib={}, **extra): + def __init__(self, tag, attrib=None, **extra): + if attrib is None: + attrib = {} if not isinstance(attrib, dict): raise TypeError("attrib must be dict, not %s" % ( attrib.__class__.__name__,)) @@ -282,6 +285,8 @@ def find(self, path, namespaces=None): Return the first matching element, or None if no element was found. """ + if namespaces is None: + namespaces = {v: k for k, v in _ET._namespace_map.items() if v} return ElementPath.find(self, path, namespaces) def findtext(self, path, default=None, namespaces=None): @@ -296,6 +301,8 @@ def findtext(self, path, default=None, namespaces=None): content, the empty string is returned. """ + if namespaces is None: + namespaces = {v: k for k, v in _ET._namespace_map.items() if v} return ElementPath.findtext(self, path, default, namespaces) def findall(self, path, namespaces=None): @@ -307,6 +314,8 @@ def findall(self, path, namespaces=None): Returns list containing all matching elements in document order. """ + if namespaces is None: + namespaces = {v: k for k, v in _ET._namespace_map.items() if v} return ElementPath.findall(self, path, namespaces) def iterfind(self, path, namespaces=None): @@ -318,6 +327,8 @@ def iterfind(self, path, namespaces=None): Return an iterable yielding all matching elements in document order. """ + if namespaces is None: + namespaces = {v: k for k, v in _ET._namespace_map.items() if v} return ElementPath.iterfind(self, path, namespaces) def clear(self): @@ -1540,7 +1551,7 @@ def __init__(self, *, target=None, encoding=None): parser = expat.ParserCreate(encoding, "}") if target is None: target = TreeBuilder() - # underscored names are provided for compatibility only + # both names are provided for compatibility self.parser = self._parser = parser self.target = self._target = target self._error = expat.error diff --git a/Misc/NEWS.d/next/Library/2025-10-21-04-00-29.gh-issue-140123.Cc6xGI.rst b/Misc/NEWS.d/next/Library/2025-10-21-04-00-29.gh-issue-140123.Cc6xGI.rst new file mode 100644 index 00000000000000..7a97878c011310 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-21-04-00-29.gh-issue-140123.Cc6xGI.rst @@ -0,0 +1,2 @@ +ElementTree.find(), ElementTree.findall(), and ElementTree.findtext() now +use registered namespaces consistently with ElementPath.