Skip to content
Closed
46 changes: 46 additions & 0 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,27 @@ when there is no match, you can test whether there was a match with a simple
if match:
process(match)

Match objects are proper :class:`~collections.abc.Sequence` types. You can access
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not true with this PR, Sequence has a number of other requirements (e.g. an index and count method).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch, thanks! I added index and count by adapting the implementation of tuple.index and tuple.count. I also updated the unit test to cover all Sequence mixin methods.

match groups via subscripting ``match[...]`` and use familiar
:class:`~collections.abc.Sequence` idioms to iterate over and extract match groups::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> m[1]
"Isaac"
>>> list(m)
["Isaac Newton", "Isaac", "Newton"]
>>> _, first_name, last_name = m
>>> last_name
"Newton"

You can also destructure match objects with python's ``match`` statement::

>>> match re.match(r"(\d+)-(\d+)-(\d+)", "2000-10-16"):
... case [_, year, month, day]:
... year
...
"2000"

.. class:: Match

Match object returned by successful ``match``\ es and ``search``\ es.
Expand Down Expand Up @@ -1474,6 +1495,18 @@ when there is no match, you can test whether there was a match with a simple
.. versionadded:: 3.6


.. method:: Match.__len__()

Returns the number of groups accessible through the subscript syntax provided by
:meth:`~Match.__getitem__`. This includes group ``0`` representing the entire match::

>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
>>> len(m)
3

.. versionadded:: 3.14


.. method:: Match.groups(default=None)

Return a tuple containing all the subgroups of the match, from 1 up to however
Expand Down Expand Up @@ -1538,6 +1571,19 @@ when there is no match, you can test whether there was a match with a simple
that if *group* did not contribute to the match, this is ``(-1, -1)``.
*group* defaults to zero, the entire match.

.. method:: Match.index(value, start=0, stop=sys.maxsize, /)

Return the index of the first occurrence of the value among the matched groups.

Raises ValueError if the value is not present.

.. versionadded:: 3.14

.. method:: Match.count(value, /)

Return the number of occurrences of the value among the matched groups.

.. versionadded:: 3.14

.. attribute:: Match.pos

Expand Down
3 changes: 3 additions & 0 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
import enum
from . import _compiler, _parser
import functools
import _collections_abc
import _sre


Expand Down Expand Up @@ -315,6 +316,8 @@ def escape(pattern):
Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))

_collections_abc.Sequence.register(Match)

# --------------------------------------------------------------------
# internals

Expand Down
62 changes: 60 additions & 2 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,66 @@ def test_match_getitem(self):
with self.assertRaises(TypeError):
m[0] = 1

# No len().
self.assertRaises(TypeError, len, m)
def test_match_sequence(self):
from collections.abc import Sequence

m = re.match(r"(a)(b)(c)", "abc")
self.assertIsInstance(m, Sequence)
self.assertEqual(len(m), 4)

it = iter(m)
self.assertEqual(next(it), "abc")
self.assertEqual(next(it), "a")
self.assertEqual(next(it), "b")
self.assertEqual(next(it), "c")

self.assertEqual(tuple(m), ("abc", "a", "b", "c"))
self.assertEqual(list(m), ["abc", "a", "b", "c"])

abc, a, b, c = m
self.assertEqual(abc, "abc")
self.assertEqual(a, "a")
self.assertEqual(b, "b")
self.assertEqual(c, "c")

self.assertIn("abc", m)
self.assertIn("a", m)
self.assertIn("b", m)
self.assertIn("c", m)
self.assertNotIn("123", m)

self.assertEqual(list(reversed(m)), ["c", "b", "a", "abc"])

self.assertEqual(m.index("abc"), 0)
self.assertEqual(m.index("a"), 1)
self.assertEqual(m.index("b"), 2)
self.assertEqual(m.index("c"), 3)
self.assertRaises(ValueError, m.index, "123")

self.assertEqual(m.count("abc"), 1)
self.assertEqual(m.count("a"), 1)
self.assertEqual(m.count("b"), 1)
self.assertEqual(m.count("c"), 1)
self.assertEqual(m.count("123"), 0)

match m:
case [_, "a", "b", "c"]:
pass
case _:
self.fail()

match re.match(r"(\d+)-(\d+)-(\d+)", "2025-05-07"):
case [_, year, month, day]:
self.assertEqual(year, "2025")
self.assertEqual(month, "05")
self.assertEqual(day, "07")
case _:
self.fail()

for s, k, v in re.finditer(r"(\w+):(\w+)", "abc:123"):
self.assertEqual(s, "abc:123")
self.assertEqual(k, "abc")
self.assertEqual(v, "123")

def test_re_fullmatch(self):
# Issue 16203: Proposal: add re.fullmatch() method.
Expand Down
70 changes: 69 additions & 1 deletion Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

117 changes: 109 additions & 8 deletions Modules/_sre/sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -2431,8 +2431,31 @@ match_group(PyObject *op, PyObject* args)
return result;
}

static Py_ssize_t
match_length(PyObject *op)
{
MatchObject *self = _MatchObject_CAST(op);
return self->groups;
}

static PyObject*
match_item(PyObject *op, Py_ssize_t index)
{
MatchObject *self = _MatchObject_CAST(op);

if (index < 0 || index >= self->groups) {
/* raise IndexError if we were given a bad group number */
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_IndexError, "no such group");
}
return NULL;
}

return match_getslice_by_index(self, index, Py_None);
}

static PyObject*
match_getitem(PyObject *op, PyObject* name)
match_subscript(PyObject *op, PyObject* name)
{
MatchObject *self = _MatchObject_CAST(op);
return match_getslice(self, name, Py_None);
Expand Down Expand Up @@ -2614,6 +2637,82 @@ _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
return _pair(self->mark[index*2], self->mark[index*2+1]);
}

/*[clinic input]
_sre.SRE_Match.index

value: object
start: slice_index(accept={int}) = 0
stop: slice_index(accept={int}, c_default="PY_SSIZE_T_MAX") = sys.maxsize
/

Return the index of the first occurrence of the value among the matched groups.

Raises ValueError if the value is not present.
[clinic start generated code]*/

static PyObject *
_sre_SRE_Match_index_impl(MatchObject *self, PyObject *value,
Py_ssize_t start, Py_ssize_t stop)
/*[clinic end generated code: output=846597f6f96f829c input=7f41b5a99e0ad88e]*/
{
Py_ssize_t i;

if (start < 0) {
start += self->groups;
if (start < 0)
start = 0;
}
if (stop < 0) {
stop += self->groups;
}
else if (stop > self->groups) {
stop = self->groups;
}
for (i = start; i < stop; i++) {
PyObject* group = match_getslice_by_index(self, i, Py_None);
if (group == NULL)
return NULL;
int cmp = PyObject_RichCompareBool(group, value, Py_EQ);
Py_DECREF(group);
if (cmp > 0)
return PyLong_FromSsize_t(i);
else if (cmp < 0)
return NULL;
}
PyErr_SetString(PyExc_ValueError, "match.index(x): x not in match");
return NULL;
}

/*[clinic input]
_sre.SRE_Match.count

value: object
/

Return the number of occurrences of the value among the matched groups.
[clinic start generated code]*/

static PyObject *
_sre_SRE_Match_count_impl(MatchObject *self, PyObject *value)
/*[clinic end generated code: output=c0b81bdce5872620 input=b1f3372cfb4b8c74]*/
{
Py_ssize_t count = 0;
Py_ssize_t i;

for (i = 0; i < self->groups; i++) {
PyObject* group = match_getslice_by_index(self, i, Py_None);
if (group == NULL)
return NULL;
int cmp = PyObject_RichCompareBool(group, value, Py_EQ);
Py_DECREF(group);
if (cmp > 0)
count++;
else if (cmp < 0)
return NULL;
}
return PyLong_FromSsize_t(count);
}

static PyObject*
match_regs(MatchObject* self)
{
Expand Down Expand Up @@ -3224,6 +3323,8 @@ static PyMethodDef match_methods[] = {
_SRE_SRE_MATCH_START_METHODDEF
_SRE_SRE_MATCH_END_METHODDEF
_SRE_SRE_MATCH_SPAN_METHODDEF
_SRE_SRE_MATCH_INDEX_METHODDEF
_SRE_SRE_MATCH_COUNT_METHODDEF
_SRE_SRE_MATCH_GROUPS_METHODDEF
_SRE_SRE_MATCH_GROUPDICT_METHODDEF
_SRE_SRE_MATCH_EXPAND_METHODDEF
Expand Down Expand Up @@ -3268,12 +3369,12 @@ static PyType_Slot match_slots[] = {
{Py_tp_traverse, match_traverse},
{Py_tp_clear, match_clear},

/* As mapping.
*
* Match objects do not support length or assignment, but do support
* __getitem__.
*/
{Py_mp_subscript, match_getitem},
// Sequence protocol
{Py_sq_length, match_length},
{Py_sq_item, match_item},

// Support group names provided as subscripts
{Py_mp_subscript, match_subscript},

{0, NULL},
};
Expand All @@ -3282,7 +3383,7 @@ static PyType_Spec match_spec = {
.name = "re.Match",
.basicsize = sizeof(MatchObject),
.itemsize = sizeof(Py_ssize_t),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_SEQUENCE | Py_TPFLAGS_IMMUTABLETYPE |
Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
.slots = match_slots,
};
Expand Down
Loading