Skip to content
Closed
65 changes: 65 additions & 0 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,27 @@ when there is no match, you can test whether there was a match with a simple
if match:
process(match)

Match objects are proper :class:`~collections.abc.Sequence` types. You can access
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not true with this PR, Sequence has a number of other requirements (e.g. an index and count method).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch, thanks! I added index and count by adapting the implementation of tuple.index and tuple.count. I also updated the unit test to cover all Sequence mixin methods.

match groups via subscripting ``match[...]`` and use familiar
:class:`~collections.abc.Sequence` idioms to iterate over and extract match groups::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> m[1]
"Isaac"
>>> list(m)
["Isaac Newton", "Isaac", "Newton"]
>>> _, first_name, last_name = m
>>> last_name
"Newton"

You can also destructure match objects with python's ``match`` statement::

>>> match re.match(r"(\d+)-(\d+)-(\d+)", "2000-10-16"):
... case [_, year, month, day]:
... year
...
"2000"

.. class:: Match

Match object returned by successful ``match``\ es and ``search``\ es.
Expand Down Expand Up @@ -1473,6 +1494,37 @@ when there is no match, you can test whether there was a match with a simple

.. versionadded:: 3.6

.. versionchanged:: 3.14

Negative indexing is now supported. This allows accessing match groups
from the end, starting from the last group defined in the pattern::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> m[-1] # The first parenthesized subgroup starting from the end.
'Newton'
>>> m[-2] # The second parenthesized subgroup starting from the end.
'Isaac'
>>> m[-3] # The entire match starting from the end.
'Isaac Newton'

You can also use slicing to extract multiple groups as a tuple::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> m[1:]
('Isaac', 'Newton')


.. method:: Match.__len__()

Returns the number of groups accessible through the subscript syntax provided by
:meth:`~Match.__getitem__`. This includes group ``0`` representing the entire match::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> len(m)
3

.. versionadded:: 3.14


.. method:: Match.groups(default=None)

Expand Down Expand Up @@ -1538,6 +1590,19 @@ when there is no match, you can test whether there was a match with a simple
that if *group* did not contribute to the match, this is ``(-1, -1)``.
*group* defaults to zero, the entire match.

.. method:: Match.index(value, start=0, stop=sys.maxsize, /)

Return the index of the first occurrence of the value among the matched groups.

Raises :exc:`ValueError` if the value is not present.

.. versionadded:: 3.14

.. method:: Match.count(value, /)

Return the number of occurrences of the value among the matched groups.

.. versionadded:: 3.14

.. attribute:: Match.pos

Expand Down
3 changes: 3 additions & 0 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
import enum
from . import _compiler, _parser
import functools
import _collections_abc
import _sre


Expand Down Expand Up @@ -315,6 +316,8 @@ def escape(pattern):
Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))

_collections_abc.Sequence.register(Match)

# --------------------------------------------------------------------
# internals

Expand Down
85 changes: 82 additions & 3 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,10 +570,14 @@ def test_match_getitem(self):
self.assertEqual(m[1], 'a')
self.assertEqual(m[2], None)
self.assertEqual(m[3], None)
self.assertEqual(m[-1], None)
self.assertEqual(m[-2], None)
self.assertEqual(m[-3], 'a')
self.assertEqual(m[-4], 'a')
with self.assertRaisesRegex(IndexError, 'no such group'):
m['X']
with self.assertRaisesRegex(IndexError, 'no such group'):
m[-1]
m[-5]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[4]
with self.assertRaisesRegex(IndexError, 'no such group'):
Expand All @@ -599,8 +603,83 @@ def test_match_getitem(self):
with self.assertRaises(TypeError):
m[0] = 1

# No len().
self.assertRaises(TypeError, len, m)
def test_match_sequence(self):
from collections.abc import Sequence

m = re.match(r"(a)(b)(c)", "abc")
self.assertIsInstance(m, Sequence)
self.assertEqual(len(m), 4)

self.assertEqual(m[0], "abc")
self.assertEqual(m[1], "a")
self.assertEqual(m[2], "b")
self.assertEqual(m[3], "c")
with self.assertRaises(IndexError):
_ = m[4]

self.assertEqual(m[-1], "c")
self.assertEqual(m[-2], "b")
self.assertEqual(m[-3], "a")
self.assertEqual(m[-4], "abc")
with self.assertRaises(IndexError):
_ = m[-5]

self.assertEqual(m[1:-1], ("a", "b"))
self.assertEqual(m[::-1], ("c", "b", "a", "abc"))

it = iter(m)
self.assertEqual(next(it), "abc")
self.assertEqual(next(it), "a")
self.assertEqual(next(it), "b")
self.assertEqual(next(it), "c")

self.assertEqual(tuple(m), ("abc", "a", "b", "c"))
self.assertEqual(list(m), ["abc", "a", "b", "c"])

abc, a, b, c = m
self.assertEqual(abc, "abc")
self.assertEqual(a, "a")
self.assertEqual(b, "b")
self.assertEqual(c, "c")

self.assertIn("abc", m)
self.assertIn("a", m)
self.assertIn("b", m)
self.assertIn("c", m)
self.assertNotIn("123", m)

self.assertEqual(list(reversed(m)), ["c", "b", "a", "abc"])

self.assertEqual(m.index("abc"), 0)
self.assertEqual(m.index("a"), 1)
self.assertEqual(m.index("b"), 2)
self.assertEqual(m.index("c"), 3)
self.assertRaises(ValueError, m.index, "123")

self.assertEqual(m.count("abc"), 1)
self.assertEqual(m.count("a"), 1)
self.assertEqual(m.count("b"), 1)
self.assertEqual(m.count("c"), 1)
self.assertEqual(m.count("123"), 0)

match m:
case [_, "a", "b", "c"]:
pass
case _:
self.fail()

match re.match(r"(\d+)-(\d+)-(\d+)", "2025-05-07"):
case [_, year, month, day]:
self.assertEqual(year, "2025")
self.assertEqual(month, "05")
self.assertEqual(day, "07")
case _:
self.fail()

for s, k, v in re.finditer(r"(\w+):(\w+)", "abc:123"):
self.assertEqual(s, "abc:123")
self.assertEqual(k, "abc")
self.assertEqual(v, "123")

def test_re_fullmatch(self):
# Issue 16203: Proposal: add re.fullmatch() method.
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ Bennett Benson
Ezra Berch
Stuart Berg
Michel Van den Bergh
Valentin Berlier
Julian Berman
Brice Berna
Olivier Bernard
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Make :class:`re.Match` a well-rounded :class:`~collections.abc.Sequence`
type.
70 changes: 69 additions & 1 deletion Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading