Skip to content
Closed
33 changes: 33 additions & 0 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,27 @@ when there is no match, you can test whether there was a match with a simple
if match:
process(match)

Match objects are proper :class:`~collections.abc.Sequence` types. You can access
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not true with this PR, Sequence has a number of other requirements (e.g. an index and count method).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch, thanks! I added index and count by adapting the implementation of tuple.index and tuple.count. I also updated the unit test to cover all Sequence mixin methods.

match groups via subscripting ``match[...]`` and use familiar
:class:`~collections.abc.Sequence` idioms to iterate over and extract match groups::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> m[1]
"Isaac"
>>> list(m)
["Isaac Newton", "Isaac", "Newton"]
>>> _, first_name, last_name = m
>>> last_name
"Newton"

You can also destructure match objects with python's ``match`` statement::

>>> match re.match(r"(\d+)-(\d+)-(\d+)", "2000-10-16"):
... case [_, year, month, day]:
... year
...
"2000"

.. class:: Match

Match object returned by successful ``match``\ es and ``search``\ es.
Expand Down Expand Up @@ -1474,6 +1495,18 @@ when there is no match, you can test whether there was a match with a simple
.. versionadded:: 3.6


.. method:: Match.__len__()

Returns the number of groups accessible through the subscript syntax provided by
:meth:`~Match.__getitem__`. This includes group ``0`` representing the entire match::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> len(m)
3

.. versionadded:: 3.14


.. method:: Match.groups(default=None)

Return a tuple containing all the subgroups of the match, from 1 up to however
Expand Down
3 changes: 3 additions & 0 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
import enum
from . import _compiler, _parser
import functools
import _collections_abc
import _sre


Expand Down Expand Up @@ -315,6 +316,8 @@ def escape(pattern):
Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))

_collections_abc.Sequence.register(Match)

# --------------------------------------------------------------------
# internals

Expand Down
37 changes: 35 additions & 2 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,41 @@ def test_match_getitem(self):
with self.assertRaises(TypeError):
m[0] = 1

# No len().
self.assertRaises(TypeError, len, m)
def test_match_sequence(self):
from collections.abc import Sequence

m = re.match(r"(a)(b)(c)", "abc")
self.assertIsInstance(m, Sequence)
self.assertEqual(len(m), 4)

it = iter(m)
self.assertEqual(next(it), "abc")
self.assertEqual(next(it), "a")
self.assertEqual(next(it), "b")
self.assertEqual(next(it), "c")

self.assertEqual(tuple(m), ("abc", "a", "b", "c"))
self.assertEqual(list(m), ["abc", "a", "b", "c"])

abc, a, b, c = m
self.assertEqual(abc, "abc")
self.assertEqual(a, "a")
self.assertEqual(b, "b")
self.assertEqual(c, "c")

match m:
case [_, "a", "b", "c"]:
pass
case _:
self.fail()

match re.match(r"(\d+)-(\d+)-(\d+)", "2025-05-07"):
case [_, year, month, day]:
self.assertEqual(year, "2025")
self.assertEqual(month, "05")
self.assertEqual(day, "07")
case _:
self.fail()

def test_re_fullmatch(self):
# Issue 16203: Proposal: add re.fullmatch() method.
Expand Down
39 changes: 31 additions & 8 deletions Modules/_sre/sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -2431,8 +2431,31 @@ match_group(PyObject *op, PyObject* args)
return result;
}

static Py_ssize_t
match_length(PyObject *op)
{
MatchObject *self = _MatchObject_CAST(op);
return self->groups;
}

static PyObject*
match_getitem(PyObject *op, PyObject* name)
match_item(PyObject *op, Py_ssize_t index)
{
MatchObject *self = _MatchObject_CAST(op);

if (index < 0 || index >= self->groups) {
/* raise IndexError if we were given a bad group number */
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_IndexError, "no such group");
}
return NULL;
}

return match_getslice_by_index(self, index, Py_None);
}

static PyObject*
match_subscript(PyObject *op, PyObject* name)
{
MatchObject *self = _MatchObject_CAST(op);
return match_getslice(self, name, Py_None);
Expand Down Expand Up @@ -3268,12 +3291,12 @@ static PyType_Slot match_slots[] = {
{Py_tp_traverse, match_traverse},
{Py_tp_clear, match_clear},

/* As mapping.
*
* Match objects do not support length or assignment, but do support
* __getitem__.
*/
{Py_mp_subscript, match_getitem},
// Sequence protocol
{Py_sq_length, match_length},
{Py_sq_item, match_item},

// Support group names provided as subscripts
{Py_mp_subscript, match_subscript},

{0, NULL},
};
Expand All @@ -3282,7 +3305,7 @@ static PyType_Spec match_spec = {
.name = "re.Match",
.basicsize = sizeof(MatchObject),
.itemsize = sizeof(Py_ssize_t),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_SEQUENCE | Py_TPFLAGS_IMMUTABLETYPE |
Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
.slots = match_slots,
};
Expand Down
Loading