Skip to content

Commit c7cbb61

Browse files
gh-139489: Add xml.sax.utils.is_valid_name()
It allows to check whether a string can be used as an element or attribute name in XML.
1 parent 5cea843 commit c7cbb61

File tree

5 files changed

+72
-4
lines changed

5 files changed

+72
-4
lines changed

Doc/library/xml.sax.utils.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,20 @@ or as base classes.
5959
using the reference concrete syntax.
6060

6161

62+
.. function:: is_valid_name(name)
63+
64+
Return ``True`` if the string is a valid element or attribute name,
65+
``False`` otherwise.
66+
67+
Almost all characters are permitted in names, except control characters and
68+
those which either are or reasonably could be used as delimiters.
69+
Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/",
70+
"!", "?", and "=" are forbidden.
71+
The name cannot start with a digit or a character like "-", ".", and "·".
72+
73+
..versionadded:: next
74+
75+
6276
.. class:: XMLGenerator(out=None, encoding='iso-8859-1', short_empty_elements=False)
6377

6478
This class implements the :class:`~xml.sax.handler.ContentHandler` interface

Doc/whatsnew/3.15.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,14 @@ xml.parsers.expat
580580
.. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack
581581

582582

583+
xml.sax.utils
584+
-------------
585+
586+
* Add the :func:`~xml.sax.utils.is_valid_name` function, which allows to check
587+
whether a string can be used as an element or attribute name in XML.
588+
(Contributed by Serhiy Storchaka in :gh:`139489`.)
589+
590+
583591
zlib
584592
----
585593

Lib/test/test_sax.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
except SAXReaderNotAvailable:
1010
# don't try to test this module if we cannot create a parser
1111
raise unittest.SkipTest("no XML parsers available")
12-
from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
13-
XMLFilterBase, prepare_input_source
12+
from xml.sax.saxutils import (XMLGenerator, escape, unescape, quoteattr,
13+
is_valid_name,
14+
XMLFilterBase, prepare_input_source)
1415
from xml.sax.expatreader import create_parser
1516
from xml.sax.handler import (feature_namespaces, feature_external_ges,
1617
LexicalHandler)
@@ -343,6 +344,23 @@ def test_single_double_quoteattr(self):
343344
self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
344345
"\"Includes 'single' and &quot;double&quot; quotes\"")
345346

347+
def test_is_valid_name(self):
348+
self.assertFalse(is_valid_name(''))
349+
self.assertTrue(is_valid_name('name'))
350+
self.assertTrue(is_valid_name('NAME'))
351+
self.assertTrue(is_valid_name('name0:-._·'))
352+
self.assertTrue(is_valid_name('_'))
353+
self.assertTrue(is_valid_name(':'))
354+
self.assertTrue(is_valid_name('Ñàḿĕ'))
355+
self.assertTrue(is_valid_name('\U000EFFFF'))
356+
self.assertFalse(is_valid_name('0'))
357+
self.assertFalse(is_valid_name('-'))
358+
self.assertFalse(is_valid_name('.'))
359+
self.assertFalse(is_valid_name('·'))
360+
self.assertFalse(is_valid_name('na me'))
361+
for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000':
362+
self.assertFalse(is_valid_name('name' + c))
363+
346364
# ===== make_parser
347365
def test_make_parser(self):
348366
# Creating a parser should succeed - it should fall back

Lib/xml/sax/saxutils.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
convenience of application and driver writers.
44
"""
55

6-
import os, urllib.parse, urllib.request
7-
import io
86
import codecs
7+
import io
8+
import os
9+
import re
10+
import urllib.parse
11+
import urllib.request
912
from . import handler
1013
from . import xmlreader
1114

@@ -67,6 +70,29 @@ def quoteattr(data, entities={}):
6770
data = '"%s"' % data
6871
return data
6972

73+
def is_valid_name(name):
74+
"""Test whether a string is a valid element or attribute name."""
75+
# https://www.w3.org/TR/xml/#NT-Name
76+
return re.fullmatch(
77+
# NameStartChar
78+
'['
79+
':A-Z_a-z'
80+
'\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF'
81+
'\u200C\u200D'
82+
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
83+
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
84+
']'
85+
# NameChar
86+
'['
87+
r'\-.0-9:A-Z_a-z'
88+
'\xB7'
89+
'\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF'
90+
'\u200C\u200D\u203F\u2040'
91+
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
92+
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
93+
']*',
94+
name) is not None
95+
7096

7197
def _gettextwriter(out, encoding):
7298
if out is None:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add the :func:`~xml.sax.utils.is_valid_name` function, which allows to check
2+
whether a string can be used as an element or attribute name in XML.

0 commit comments

Comments
 (0)