diff --git a/Doc/library/re.rst b/Doc/library/re.rst index a91bac53fb4e75..4aa43ff4e85305 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -831,7 +831,7 @@ Flags value:: def myfunc(text, flag=re.NOFLAG): - return re.match(text, flag) + return re.search(text, flag) .. versionadded:: 3.11 @@ -887,8 +887,8 @@ Functions Compile a regular expression pattern into a :ref:`regular expression object `, which can be used for matching using its - :func:`~Pattern.match`, :func:`~Pattern.search` and other methods, described - below. + :func:`~Pattern.prefixmatch` (:func:`~Pattern.match`), + :func:`~Pattern.search`, and other methods, described below. The expression's behaviour can be modified by specifying a *flags* value. Values can be any of the `flags`_ variables, combined using bitwise OR @@ -897,11 +897,11 @@ Functions The sequence :: prog = re.compile(pattern) - result = prog.match(string) + result = prog.search(string) is equivalent to :: - result = re.match(pattern, string) + result = re.search(pattern, string) but using :func:`re.compile` and saving the resulting regular expression object for reuse is more efficient when the expression will be used several @@ -928,14 +928,15 @@ Functions .. function:: match(pattern, string, flags=0) +.. function:: prefixmatch(pattern, string, flags=0) If zero or more characters at the beginning of *string* match the regular expression *pattern*, return a corresponding :class:`~re.Match`. Return ``None`` if the string does not match the pattern; note that this is different from a zero-length match. - Note that even in :const:`MULTILINE` mode, :func:`re.match` will only match - at the beginning of the string and not at the beginning of each line. + Note that even in :const:`MULTILINE` mode, this will only match at the + beginning of the string and not at the beginning of each line. If you want to locate a match anywhere in *string*, use :func:`search` instead (see also :ref:`search-vs-match`). @@ -944,6 +945,18 @@ Functions Values can be any of the `flags`_ variables, combined using bitwise OR (the ``|`` operator). + This function now has two names and has long been known as + :func:`~re.match`. Use that name when you need to retain compatibility with + older Python versions. + + .. versionchanged:: next + An alternate :func:`~re.prefixmatch` name with this API was added as a + more descriptive explicit name for the behavior of :func:`~re.match`. Use + it to more clearly express intent. The norm in other languages and + regular expression implementations is to use the term *match* to refer to + the behavior of what Python has always called :func:`~re.search`. See + :ref:`prefixmatch-vs-match`. + .. function:: fullmatch(pattern, string, flags=0) @@ -1264,23 +1277,42 @@ Regular Expression Objects .. method:: Pattern.match(string[, pos[, endpos]]) +.. method:: Pattern.prefixmatch(string[, pos[, endpos]]) If zero or more characters at the *beginning* of *string* match this regular expression, return a corresponding :class:`~re.Match`. Return ``None`` if the string does not match the pattern; note that this is different from a zero-length match. + Note that even in :const:`MULTILINE` mode, this will only match at the + beginning of the string and not at the beginning of each line. + The optional *pos* and *endpos* parameters have the same meaning as for the :meth:`~Pattern.search` method. :: >>> pattern = re.compile("o") - >>> pattern.match("dog") # No match as "o" is not at the start of "dog". - >>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog". + >>> pattern.prefixmatch("dog") # No match as "o" is not at the start of "dog". + >>> pattern.prefixmatch("dog", 1) # Match as "o" is the 2nd character of "dog". + + >>> pattern.match("dog") # Same as above. + >>> pattern.match("dog", 1) # Same as above. If you want to locate a match anywhere in *string*, use :meth:`~Pattern.search` instead (see also :ref:`search-vs-match`). + This method now has two names and has long been known as + :meth:`~Pattern.match`. Use that name when you need to retain compatibility + with older Python versions. + + .. versionchanged:: next + An alternate :meth:`~Pattern.prefixmatch` name with this API was added as + a more descriptive explicit name for the behavior of + :meth:`~Pattern.match`. Use it to more clearly express intent. The norm + in other languages and regular expression implementations is to use the + term *match* to refer to the behavior of what Python has always called + :meth:`~Pattern.search`. See :ref:`prefixmatch-vs-match`. + .. method:: Pattern.fullmatch(string[, pos[, endpos]]) @@ -1368,8 +1400,7 @@ Since :meth:`~Pattern.match` and :meth:`~Pattern.search` return ``None`` when there is no match, you can test whether there was a match with a simple ``if`` statement:: - match = re.search(pattern, string) - if match: + if match := re.search(pattern, string): process(match) .. class:: Match @@ -1407,7 +1438,7 @@ when there is no match, you can test whether there was a match with a simple If a group is contained in a part of the pattern that matched multiple times, the last match is returned. :: - >>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"(\w+) (\w+)", "Isaac Newton, physicist") >>> m.group(0) # The entire match 'Isaac Newton' >>> m.group(1) # The first parenthesized subgroup. @@ -1424,7 +1455,7 @@ when there is no match, you can test whether there was a match with a simple A moderately complicated example:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m = re.search(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") >>> m.group('first_name') 'Malcolm' >>> m.group('last_name') @@ -1439,8 +1470,8 @@ when there is no match, you can test whether there was a match with a simple If a group matches multiple times, only the last match is accessible:: - >>> m = re.match(r"(..)+", "a1b2c3") # Matches 3 times. - >>> m.group(1) # Returns only the last match. + >>> m = re.search(r"(..)+", "a1b2c3") # Matches 3 times. + >>> m.group(1) # Returns only the last match. 'c3' @@ -1449,7 +1480,7 @@ when there is no match, you can test whether there was a match with a simple This is identical to ``m.group(g)``. This allows easier access to an individual group from a match:: - >>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"(\w+) (\w+)", "Isaac Newton, physicist") >>> m[0] # The entire match 'Isaac Newton' >>> m[1] # The first parenthesized subgroup. @@ -1459,7 +1490,7 @@ when there is no match, you can test whether there was a match with a simple Named groups are supported as well:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Isaac Newton") + >>> m = re.search(r"(?P\w+) (?P\w+)", "Isaac Newton") >>> m['first_name'] 'Isaac' >>> m['last_name'] @@ -1476,7 +1507,7 @@ when there is no match, you can test whether there was a match with a simple For example:: - >>> m = re.match(r"(\d+)\.(\d+)", "24.1632") + >>> m = re.search(r"(\d+)\.(\d+)", "24.1632") >>> m.groups() ('24', '1632') @@ -1484,7 +1515,7 @@ when there is no match, you can test whether there was a match with a simple might participate in the match. These groups will default to ``None`` unless the *default* argument is given:: - >>> m = re.match(r"(\d+)\.?(\d+)?", "24") + >>> m = re.search(r"(\d+)\.?(\d+)?", "24") >>> m.groups() # Second group defaults to None. ('24', None) >>> m.groups('0') # Now, the second group defaults to '0'. @@ -1497,7 +1528,7 @@ when there is no match, you can test whether there was a match with a simple the subgroup name. The *default* argument is used for groups that did not participate in the match; it defaults to ``None``. For example:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m = re.search(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") >>> m.groupdict() {'first_name': 'Malcolm', 'last_name': 'Reynolds'} @@ -1603,38 +1634,38 @@ representing the card with that value. To see if a given string is a valid hand, one could do the following:: >>> valid = re.compile(r"^[a2-9tjqk]{5}$") - >>> displaymatch(valid.match("akt5q")) # Valid. + >>> displaymatch(valid.search("akt5q")) # Valid. "" - >>> displaymatch(valid.match("akt5e")) # Invalid. - >>> displaymatch(valid.match("akt")) # Invalid. - >>> displaymatch(valid.match("727ak")) # Valid. + >>> displaymatch(valid.search("akt5e")) # Invalid. + >>> displaymatch(valid.search("akt")) # Invalid. + >>> displaymatch(valid.search("727ak")) # Valid. "" That last hand, ``"727ak"``, contained a pair, or two of the same valued cards. To match this with a regular expression, one could use backreferences as such:: - >>> pair = re.compile(r".*(.).*\1") - >>> displaymatch(pair.match("717ak")) # Pair of 7s. + >>> pair = re.compile(r"^.*(.).*\1") + >>> displaymatch(pair.search("717ak")) # Pair of 7s. "" - >>> displaymatch(pair.match("718ak")) # No pairs. - >>> displaymatch(pair.match("354aa")) # Pair of aces. + >>> displaymatch(pair.search("718ak")) # No pairs. + >>> displaymatch(pair.search("354aa")) # Pair of aces. "" To find out what card the pair consists of, one could use the :meth:`~Match.group` method of the match object in the following manner:: - >>> pair = re.compile(r".*(.).*\1") - >>> pair.match("717ak").group(1) + >>> pair = re.compile(r"^.*(.).*\1") + >>> pair.search("717ak").group(1) '7' - # Error because re.match() returns None, which doesn't have a group() method: - >>> pair.match("718ak").group(1) + # Error because re.search() returns None, which doesn't have a group() method: + >>> pair.search("718ak").group(1) Traceback (most recent call last): File "", line 1, in - re.match(r".*(.).*\1", "718ak").group(1) + re.search(r".*(.).*\1", "718ak").group(1) AttributeError: 'NoneType' object has no attribute 'group' - >>> pair.match("354aa").group(1) + >>> pair.search("354aa").group(1) 'a' @@ -1693,16 +1724,17 @@ search() vs. match() Python offers different primitive operations based on regular expressions: -+ :func:`re.match` checks for a match only at the beginning of the string ++ :func:`re.prefixmatch`, also known under the less explicit name + :func:`re.match`, checks for a match only at the beginning of the string + :func:`re.search` checks for a match anywhere in the string (this is what Perl does by default) + :func:`re.fullmatch` checks for entire string to be a match - For example:: - >>> re.match("c", "abcdef") # No match - >>> re.search("c", "abcdef") # Match + >>> re.match("c", "abcdef") # No match + >>> re.prefixmatch("c", "abcdef") # No match + >>> re.search("c", "abcdef") # Match >>> re.fullmatch("p.*n", "python") # Match @@ -1711,19 +1743,47 @@ For example:: Regular expressions beginning with ``'^'`` can be used with :func:`search` to restrict the match at the beginning of the string:: - >>> re.match("c", "abcdef") # No match - >>> re.search("^c", "abcdef") # No match - >>> re.search("^a", "abcdef") # Match + >>> re.match("c", "abcdef") # No match + >>> re.prefixmatch("c", "abcdef") # No match + >>> re.search("^c", "abcdef") # No match + >>> re.search("^a", "abcdef") # Match Note however that in :const:`MULTILINE` mode :func:`match` only matches at the beginning of the string, whereas using :func:`search` with a regular expression beginning with ``'^'`` will match at the beginning of each line. :: + >>> re.prefixmatch("X", "A\nB\nX", re.MULTILINE) # No match >>> re.match("X", "A\nB\nX", re.MULTILINE) # No match >>> re.search("^X", "A\nB\nX", re.MULTILINE) # Match +.. _prefixmatch-vs-match: + +prefixmatch() vs. match() +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Why is the :func:`~re.match` function and method name being discouraged in +favor of the longer :func:`~re.prefixmatch` spelling in very recent Python? + +Many other languages have gained regex support libraries since regular +expressions were added to Python. However in the most popular of those, they +use the term *match* in their APIs to mean the unanchored behavior provided in +Python by :func:`~re.search`. Thus use of the plain term *match* can be +unclear to those used to other languages when reading or writing code and +not familiar with the Python API's divergence from what otherwise become the +industry norm. + +Quoting from the Zen Of Python (``python3 -m this``): *"Explicit is better than +implicit"*. Anyone reading the name :func:`~re.prefixmatch` is likely to +understand the intended semantics. When reading :func:`~re.match` there remains +a seed of doubt about the intended behavior to anyone not already familiar with +this old Python gotcha. + +We **do not** plan to deprecate and remove the older *match* name in this +decade, if ever, as it has been used in code for over 25 years. + +.. versionadded:: next Making a Phonebook ^^^^^^^^^^^^^^^^^^ @@ -1843,9 +1903,9 @@ every backslash (``'\'``) in a regular expression would have to be prefixed with another one to escape it. For example, the two following lines of code are functionally identical:: - >>> re.match(r"\W(.)\1\W", " ff ") + >>> re.search(r"\W(.)\1\W", " ff ") - >>> re.match("\\W(.)\\1\\W", " ff ") + >>> re.search("\\W(.)\\1\\W", " ff ") When one wants to match a literal backslash, it must be escaped in the regular @@ -1853,9 +1913,9 @@ expression. With raw string notation, this means ``r"\\"``. Without raw string notation, one must use ``"\\\\"``, making the following lines of code functionally identical:: - >>> re.match(r"\\", r"\\") + >>> re.search(r"\\", r"\\") - >>> re.match("\\\\", r"\\") + >>> re.search("\\\\", r"\\") diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index aaa4702d53df93..0eae1e40f54133 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1056,6 +1056,19 @@ pydoc (Contributed by Jelle Zijlstra in :gh:`101552`.) +re +-- + +* :func:`re.prefixmatch` and a corresponding :meth:`~re.Pattern.prefixmatch` + have been added as alternate more explicit names for the existing + :func:`re.match` and :meth:`~re.Pattern.match` APIs. These are intended + to be used to alleviate confusion around what *match* means by following the + Zen of Python's *"Explicit is better than implicit"* mantra. Most other + language regular expression libraries use an API named *match* to mean what + Python has always called *search*. + (Contributed by Gregory P. Smith in :gh:`86519`.) + + ssl --- diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index 7e8abbf6ffe155..fb176ee23207f8 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -85,17 +85,18 @@ \\ Matches a literal backslash. This module exports the following functions: - match Match a regular expression pattern to the beginning of a string. - fullmatch Match a regular expression pattern to all of a string. - search Search a string for the presence of a pattern. - sub Substitute occurrences of a pattern found in a string. - subn Same as sub, but also return the number of substitutions made. - split Split a string by the occurrences of a pattern. - findall Find all occurrences of a pattern in a string. - finditer Return an iterator yielding a Match object for each match. - compile Compile a pattern into a Pattern object. - purge Clear the regular expression cache. - escape Backslash all non-alphanumerics in a string. + prefixmatch Match a regular expression pattern to the beginning of a str. + match The original name of prefixmatch prior to 3.14. + fullmatch Match a regular expression pattern to all of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string. + subn Same as sub, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a Match object for each match. + compile Compile a pattern into a Pattern object. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics in a string. Each function other than purge and escape can take an optional 'flags' argument consisting of one or more of the following module constants, joined by "|". @@ -130,14 +131,14 @@ # public symbols __all__ = [ - "match", "fullmatch", "search", "sub", "subn", "split", + "prefixmatch", "match", "fullmatch", "search", "sub", "subn", "split", "findall", "finditer", "compile", "purge", "escape", "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "UNICODE", "NOFLAG", "RegexFlag", "PatternError" ] -__version__ = "2.2.1" +__version__ = "3.14.0" @enum.global_enum @enum._simple_enum(enum.IntFlag, boundary=enum.KEEP) @@ -161,10 +162,13 @@ class RegexFlag: # -------------------------------------------------------------------- # public interface -def match(pattern, string, flags=0): +def prefixmatch(pattern, string, flags=0): """Try to apply the pattern at the start of the string, returning a Match object, or None if no match was found.""" - return _compile(pattern, flags).match(string) + return _compile(pattern, flags).prefixmatch(string) + +# Our original less explicitly clear about the behavior name for prefixmatch. +match = prefixmatch def fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning @@ -313,7 +317,7 @@ def escape(pattern): return pattern.translate(_special_chars_map).encode('latin1') Pattern = type(_compiler.compile('', 0)) -Match = type(_compiler.compile('', 0).match('')) +Match = type(_compiler.compile('', 0).prefixmatch('')) # -------------------------------------------------------------------- # internals @@ -409,10 +413,10 @@ def __init__(self, lexicon, flags=0): def scan(self, string): result = [] append = result.append - match = self.scanner.scanner(string).match + _match = self.scanner.scanner(string).prefixmatch i = 0 while True: - m = match() + m = _match() if not m: break j = m.end() diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index daae990458d708..bad56a2dd181a5 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -5812,7 +5812,10 @@ def test_pwd_module_has_signatures(self): def test_re_module_has_signatures(self): import re - methods_no_signature = {'Match': {'group'}} + methods_no_signature = { + 'Match': {'group'}, + 'Pattern': {'match'}, # It is now an alias for prefixmatch + } self._test_module_has_signatures(re, methods_no_signature=methods_no_signature, good_exceptions={'error', 'PatternError'}) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f65b4076aee2c6..f15e8e1bcefe9d 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -90,10 +90,13 @@ def test_search_star_plus(self): self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) self.assertIsNone(re.search('x', 'aaa')) self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) + self.assertEqual(re.prefixmatch('a*', 'xxx').span(0), (0, 0)) self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) + self.assertEqual(re.prefixmatch('x*', 'xxxa').span(0), (0, 3)) self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) self.assertIsNone(re.match('a+', 'xxx')) + self.assertIsNone(re.prefixmatch('a+', 'xxx')) def test_branching(self): """Test Branching @@ -180,6 +183,7 @@ def test_bug_449000(self): def test_bug_1661(self): # Verify that flags do not get silently ignored with compiled patterns pattern = re.compile('.') + self.assertRaises(ValueError, re.prefixmatch, pattern, 'A', re.I) self.assertRaises(ValueError, re.match, pattern, 'A', re.I) self.assertRaises(ValueError, re.search, pattern, 'A', re.I) self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) @@ -517,6 +521,8 @@ def test_re_match(self): self.assertEqual(re.match(b'(a)', string).group(0), b'a') self.assertEqual(re.match(b'(a)', string).group(1), b'a') self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a')) + self.assertEqual(re.prefixmatch(b'(a)', string).group(1, 1), + (b'a', b'a')) for a in ("\xe0", "\u0430", "\U0001d49c"): self.assertEqual(re.match(a, a).groups(), ()) self.assertEqual(re.match('(%s)' % a, a).groups(), (a,)) @@ -561,46 +567,48 @@ def __index__(self): def test_match_getitem(self): pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?') - m = pat.match('a') - self.assertEqual(m['a1'], 'a') - self.assertEqual(m['b2'], None) - self.assertEqual(m['c3'], None) - self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None') - self.assertEqual(m[0], 'a') - self.assertEqual(m[1], 'a') - self.assertEqual(m[2], None) - self.assertEqual(m[3], None) - with self.assertRaisesRegex(IndexError, 'no such group'): - m['X'] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[-1] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[4] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[0, 1] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[(0,)] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[(0, 1)] - with self.assertRaisesRegex(IndexError, 'no such group'): - 'a1={a2}'.format_map(m) - - m = pat.match('ac') - self.assertEqual(m['a1'], 'a') - self.assertEqual(m['b2'], None) - self.assertEqual(m['c3'], 'c') - self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c') - self.assertEqual(m[0], 'ac') - self.assertEqual(m[1], 'a') - self.assertEqual(m[2], None) - self.assertEqual(m[3], 'c') - - # Cannot assign. - with self.assertRaises(TypeError): - m[0] = 1 - - # No len(). - self.assertRaises(TypeError, len, m) + for match_fn in pat.match, pat.prefixmatch: + with self.subTest(match_fn.__name__): + m = match_fn('a') + self.assertEqual(m['a1'], 'a') + self.assertEqual(m['b2'], None) + self.assertEqual(m['c3'], None) + self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None') + self.assertEqual(m[0], 'a') + self.assertEqual(m[1], 'a') + self.assertEqual(m[2], None) + self.assertEqual(m[3], None) + with self.assertRaisesRegex(IndexError, 'no such group'): + m['X'] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[-1] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[4] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[0, 1] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[(0,)] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[(0, 1)] + with self.assertRaisesRegex(IndexError, 'no such group'): + 'a1={a2}'.format_map(m) + + m = match_fn('ac') + self.assertEqual(m['a1'], 'a') + self.assertEqual(m['b2'], None) + self.assertEqual(m['c3'], 'c') + self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c') + self.assertEqual(m[0], 'ac') + self.assertEqual(m[1], 'a') + self.assertEqual(m[2], None) + self.assertEqual(m[3], 'c') + + # Cannot assign. + with self.assertRaises(TypeError): + m[0] = 1 + + # No len(). + self.assertRaises(TypeError, len, m) def test_re_fullmatch(self): # Issue 16203: Proposal: add re.fullmatch() method. diff --git a/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst new file mode 100644 index 00000000000000..12784beef22f77 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst @@ -0,0 +1,11 @@ +The :mod:`re` module gains a new :func:`re.prefixmatch` function as an +explicit spelling of what has to date always been known as :func:`re.match`. +:class:`re.Pattern` similary gains a :meth:`re.Pattern.prefixmatch` method. + +Why? Explicit is better than implicit. Other widely used languages all use +the term "match" to mean what Python uses the term "search" for. The +unadorened "match" name in Python has been a frequent case of confusion and +coding bugs due to the inconsistency with the rest if the software industry. + +No plans to remove and deprecate the existing ``match`` names exist. If that +were to happen it would be at minimum 7 years in the future. diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h index d2f25a71495cda..b49bf4e058b69b 100644 --- a/Modules/_sre/clinic/sre.c.h +++ b/Modules/_sre/clinic/sre.c.h @@ -164,22 +164,22 @@ _sre_unicode_tolower(PyObject *module, PyObject *arg) return return_value; } -PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__, -"match($self, /, string, pos=0, endpos=sys.maxsize)\n" +PyDoc_STRVAR(_sre_SRE_Pattern_prefixmatch__doc__, +"prefixmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" "--\n" "\n" "Matches zero or more characters at the beginning of the string."); -#define _SRE_SRE_PATTERN_MATCH_METHODDEF \ - {"match", _PyCFunction_CAST(_sre_SRE_Pattern_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_match__doc__}, +#define _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF \ + {"prefixmatch", _PyCFunction_CAST(_sre_SRE_Pattern_prefixmatch), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_prefixmatch__doc__}, static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); +_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); static PyObject * -_sre_SRE_Pattern_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_sre_SRE_Pattern_prefixmatch(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -205,7 +205,7 @@ _sre_SRE_Pattern_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "match", + .fname = "prefixmatch", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -254,7 +254,7 @@ _sre_SRE_Pattern_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, endpos = ival; } skip_optional_pos: - return_value = _sre_SRE_Pattern_match_impl((PatternObject *)self, cls, string, pos, endpos); + return_value = _sre_SRE_Pattern_prefixmatch_impl((PatternObject *)self, cls, string, pos, endpos); exit: return return_value; @@ -1523,25 +1523,25 @@ _sre_SRE_Match___deepcopy__(PyObject *self, PyObject *memo) return return_value; } -PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__, -"match($self, /)\n" +PyDoc_STRVAR(_sre_SRE_Scanner_prefixmatch__doc__, +"prefixmatch($self, /)\n" "--\n" "\n"); -#define _SRE_SRE_SCANNER_MATCH_METHODDEF \ - {"match", _PyCFunction_CAST(_sre_SRE_Scanner_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_match__doc__}, +#define _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF \ + {"prefixmatch", _PyCFunction_CAST(_sre_SRE_Scanner_prefixmatch), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_prefixmatch__doc__}, static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls); +_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls); static PyObject * -_sre_SRE_Scanner_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_sre_SRE_Scanner_prefixmatch(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "match() takes no arguments"); + PyErr_SetString(PyExc_TypeError, "prefixmatch() takes no arguments"); return NULL; } - return _sre_SRE_Scanner_match_impl((ScannerObject *)self, cls); + return _sre_SRE_Scanner_prefixmatch_impl((ScannerObject *)self, cls); } PyDoc_STRVAR(_sre_SRE_Scanner_search__doc__, @@ -1568,4 +1568,4 @@ _sre_SRE_Scanner_search(PyObject *self, PyTypeObject *cls, PyObject *const *args #ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */ -/*[clinic end generated code: output=bbf42e1de3bdd3ae input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0c867efb64e020aa input=a9049054013a1b77]*/ diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 602d0ab8588f62..9280834a84d639 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -768,7 +768,7 @@ sre_search(SRE_STATE* state, SRE_CODE* pattern) } /*[clinic input] -_sre.SRE_Pattern.match +_sre.SRE_Pattern.prefixmatch cls: defining_class / @@ -780,10 +780,10 @@ Matches zero or more characters at the beginning of the string. [clinic start generated code]*/ static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ +_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=a0e079fb4f875240 input=e2a7e68ea47d048c]*/ { _sremodulestate *module_state = get_sre_module_state_by_class(cls); SRE_STATE state; @@ -811,6 +811,7 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, return match; } + /*[clinic input] _sre.SRE_Pattern.fullmatch @@ -2667,7 +2668,7 @@ _sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo) } PyDoc_STRVAR(match_doc, -"The result of re.match() and re.search().\n\ +"The result of re.search(), re.prefixmatch(), and re.fullmatch().\n\ Match objects always have a boolean value of True."); PyDoc_STRVAR(match_group_doc, @@ -2854,7 +2855,7 @@ scanner_end(ScannerObject* self) } /*[clinic input] -_sre.SRE_Scanner.match +_sre.SRE_Scanner.prefixmatch cls: defining_class / @@ -2862,8 +2863,8 @@ _sre.SRE_Scanner.match [clinic start generated code]*/ static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ +_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=02b3b9d2954a2157 input=3049b20466c56a8e]*/ { _sremodulestate *module_state = get_sre_module_state_by_class(cls); SRE_STATE* state = &self->state; @@ -3161,7 +3162,8 @@ pattern_richcompare(PyObject *lefto, PyObject *righto, int op) #include "clinic/sre.c.h" static PyMethodDef pattern_methods[] = { - _SRE_SRE_PATTERN_MATCH_METHODDEF + _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF + {"match", NULL}, /* filled in by sre_exec() */ _SRE_SRE_PATTERN_FULLMATCH_METHODDEF _SRE_SRE_PATTERN_SEARCH_METHODDEF _SRE_SRE_PATTERN_SUB_METHODDEF @@ -3288,7 +3290,8 @@ static PyType_Spec match_spec = { }; static PyMethodDef scanner_methods[] = { - _SRE_SRE_SCANNER_MATCH_METHODDEF + _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF + {"match", NULL}, /* filled in by sre_exec() */ _SRE_SRE_SCANNER_SEARCH_METHODDEF {NULL, NULL} }; @@ -3392,11 +3395,40 @@ do { \ } \ } while (0) + +static void +copy_prefixmatch_method_def_to_match(PyMethodDef *method_defs) +{ + /* We could implement logic to scan the null filled sentry + * terminated list for the two method names. But we're a + * bunch of static structs. We just guarantee their position + * and flag deviation from this via debug build assertions. + */ + assert(method_defs); + PyMethodDef *prefixmatch_md = &method_defs[0]; + assert(prefixmatch_md->ml_name != NULL); + assert(strcmp(prefixmatch_md->ml_name, "prefixmatch") == 0); + + PyMethodDef *match_md = &method_defs[1]; + assert(match_md->ml_name != NULL); + assert(strcmp(match_md->ml_name, "match") == 0); + /* If the public stable C API struct ever changed (!) and + * somehow wound up with unexpected layout and alignment + * constraints, fix the memcpy below. */ + assert(offsetof(PyMethodDef, ml_meth) == sizeof(char *)); + memcpy(&match_md->ml_meth, &prefixmatch_md->ml_meth, + sizeof(PyMethodDef) - offsetof(PyMethodDef, ml_meth)); +} + + static int sre_exec(PyObject *m) { _sremodulestate *state; + copy_prefixmatch_method_def_to_match(pattern_methods); + copy_prefixmatch_method_def_to_match(scanner_methods); + /* Create heap types */ state = get_sre_module_state(m); CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);