Skip to content

Commit f26475e

Browse files
committed
implemented simpler patterns in Group.matching and Axis.matching
1 parent 997bd60 commit f26475e

File tree

3 files changed

+130
-34
lines changed

3 files changed

+130
-34
lines changed

doc/source/changes/version_0_30.rst.inc

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,41 @@ New features
2323
Miscellaneous improvements
2424
--------------------------
2525

26-
* improved something.
26+
* implemented a simpler pattern language in :py:obj:`Axis.matching()` and :py:obj:`Group.matching()`. In addition to
27+
regular expressions (which now require using the ``regexp`` argument), the two methods support the following simpler
28+
patterns:
29+
30+
* `?` matches any single character
31+
* `*` matches any number of characters
32+
* [seq] matches any character in seq
33+
* [!seq] matches any character not in seq
34+
35+
For example, assuming the following axis:
36+
37+
>>> people = Axis(['Bruce Wayne', 'Bruce Willis', 'Waldo', 'Arthur Dent', 'Harvey Dent'], 'people')
38+
39+
All labels starting with "A" and ending with "t" are given by:
40+
41+
>>> people.matching(pattern='A*t')
42+
people['Arthur Dent']
43+
44+
All labels containing "W" and ending with "s":
45+
46+
>>> people.matching(pattern='*W*s')
47+
people['Bruce Willis']
48+
49+
All labels with exactly 5 characters:
50+
51+
>>> people.matching(pattern='?????')
52+
people['Waldo']
53+
54+
All labels starting with either "A" or "B":
55+
56+
>>> people.matching(pattern='[AB]*')
57+
people['Bruce Wayne', 'Bruce Willis', 'Arthur Dent']
2758

2859

2960
Fixes
3061
-----
3162

32-
* fixed something (closes :issue:`1`).
63+
* fixed something (closes :issue:`1`).

larray/core/axis.py

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf8 -*-
22
from __future__ import absolute_import, division, print_function
33

4+
import fnmatch
45
import re
56
import sys
67
import warnings
@@ -560,43 +561,77 @@ def equals(self, other):
560561
return isinstance(other, Axis) and self.name == other.name and self.iswildcard == other.iswildcard and \
561562
(len(self) == len(other) if self.iswildcard else np.array_equal(self.labels, other.labels))
562563

563-
def matching(self, pattern):
564+
def matching(self, deprecated=None, pattern=None, regex=None):
564565
"""
565-
Returns a group with all the labels matching the specified pattern (regular expression).
566+
Returns a group with all the labels matching the specified pattern or regular expression.
566567
567568
Parameters
568569
----------
569570
pattern : str or Group
570-
Regular expression (regex).
571+
Pattern to match.
572+
* `?` matches any single character
573+
* `*` matches any number of characters
574+
* [seq] matches any character in seq
575+
* [!seq] matches any character not in seq
576+
577+
To match any of the special characters above, wrap the character in brackets. For example, `[?]` matches
578+
the character `?`.
579+
regex : str or Group
580+
Regular expression pattern to match. Regular expressions are more powerful than what the simple patterns
581+
supported by the `pattern` argument but are also more complex to write.
582+
See `Regular Expression <https://docs.python.org/3/library/re.html>`_ for more details about how to build
583+
a regular expression pattern.
571584
572585
Returns
573586
-------
574587
LGroup
575588
Group containing all the labels matching the pattern.
576589
577-
Notes
578-
-----
579-
See `Regular Expression <https://docs.python.org/3/library/re.html>`_
580-
for more details about how to build a pattern.
581-
582590
Examples
583591
--------
584592
>>> people = Axis(['Bruce Wayne', 'Bruce Willis', 'Waldo', 'Arthur Dent', 'Harvey Dent'], 'people')
585593
586-
All labels starting with "W" and ending with "o" are given by
587-
588-
>>> people.matching('W.*o')
594+
>>> # All labels starting with "A" and ending with "t"
595+
>>> people.matching(pattern='A*t')
596+
people['Arthur Dent']
597+
>>> # All labels containing "W" and ending with "s"
598+
>>> people.matching(pattern='*W*s')
599+
people['Bruce Willis']
600+
>>> # All labels with exactly 5 characters
601+
>>> people.matching(pattern='?????')
589602
people['Waldo']
603+
>>> # All labels starting with either "A" or "B"
604+
>>> people.matching(pattern='[AB]*')
605+
people['Bruce Wayne', 'Bruce Willis', 'Arthur Dent']
590606
591-
All labels not containing character "a"
607+
Regular expressions are more powerful but usually harder to write and less readable
592608
593-
>>> people.matching('[^a]*$')
609+
>>> # All labels starting with "W" and ending with "o"
610+
>>> people.matching(regex='A.*t')
611+
people['Arthur Dent']
612+
>>> # All labels not containing character "a"
613+
>>> people.matching(regex='^[^a]*$')
594614
people['Bruce Willis', 'Arthur Dent']
595615
"""
596-
if isinstance(pattern, Group):
597-
pattern = pattern.eval()
598-
rx = re.compile(pattern)
599-
return LGroup([v for v in self.labels if rx.match(v)], axis=self)
616+
if deprecated is not None:
617+
assert pattern is None and regex is None
618+
regex = deprecated
619+
warnings.warn("Axis.matching() first argument will change to `pattern` in a later release. "
620+
"If your pattern is a regular expression, use Axis.matching(regex='yourpattern')."
621+
"If your pattern is a 'simple pattern', use Axis.matching(pattern='yourpattern').",
622+
FutureWarning, stacklevel=2)
623+
if pattern is not None and regex is not None:
624+
raise ValueError("Cannot use both `pattern` and `regex` arguments at the same time in Axis.matching()")
625+
if pattern is None and regex is None:
626+
raise ValueError("Must provide either `pattern` or `regex` argument in Axis.matching()")
627+
if isinstance(regex, Group):
628+
regex = regex.eval()
629+
if pattern is not None:
630+
if isinstance(pattern, Group):
631+
pattern = pattern.eval()
632+
regex = fnmatch.translate(pattern)
633+
match = re.compile(regex).match
634+
return LGroup([v for v in self.labels if match(v)], axis=self)
600635

601636
matches = renamed_to(matching, 'matches')
602637

larray/core/group.py

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf8 -*-
22
from __future__ import absolute_import, division, print_function
33

4+
import fnmatch
45
import re
56
import sys
67
import warnings
@@ -1276,44 +1277,73 @@ def endingwith(self, suffix):
12761277
suffix = suffix.eval()
12771278
return LGroup([v for v in self.eval() if v.endswith(suffix)], axis=self.axis)
12781279

1279-
def matching(self, pattern):
1280+
def matching(self, deprecated=None, pattern=None, regex=None):
12801281
"""
1281-
Returns a group with all the labels matching the specified pattern (regular expression).
1282+
Returns a group with all the labels matching the specified pattern or regular expression.
12821283
12831284
Parameters
12841285
----------
12851286
pattern : str or Group
1286-
Regular expression (regex).
1287+
Pattern to match.
1288+
* `?` matches any single character
1289+
* `*` matches any number of characters
1290+
* [seq] matches any character in seq
1291+
* [!seq] matches any character not in seq
1292+
1293+
To match any of the special characters above, wrap the character in brackets. For example, `[?]` matches
1294+
the character `?`.
1295+
regex : str or Group
1296+
Regular expression pattern to match. Regular expressions are more powerful than what the simple patterns
1297+
supported by the `pattern` argument but are also more complex to write.
1298+
See `Regular Expression <https://docs.python.org/3/library/re.html>`_ for more details about how to build
1299+
a regular expression pattern.
12871300
12881301
Returns
12891302
-------
12901303
LGroup
12911304
Group containing all the labels matching the pattern.
12921305
1293-
Notes
1294-
-----
1295-
See `Regular Expression <https://docs.python.org/3/library/re.html>`_
1296-
for more details about how to build a pattern.
1297-
12981306
Examples
12991307
--------
13001308
>>> from larray import Axis
13011309
>>> people = Axis(['Bruce Wayne', 'Bruce Willis', 'Arthur Dent'], 'people')
13021310
1303-
All labels containing "B" and "e" with exactly 3 characters in between are given by
1311+
Let us create a group with all names starting with B
13041312
1305-
>>> group = people.matching('B...e')
1313+
>>> group = people.startingwith('B')
13061314
>>> group
13071315
people['Bruce Wayne', 'Bruce Willis']
13081316
13091317
Within that group, all labels containing any characters then W then any characters then s are given by
1310-
>>> group.matching('.*W.*s')
1318+
1319+
>>> group.matching(pattern='*W*s')
13111320
people['Bruce Willis']
1321+
1322+
Regular expressions are more powerful but usually harder to write and less readable. For example,
1323+
here are the labels not containing the letter "i".
1324+
1325+
>>> group.matching(regex='^[^i]*$')
1326+
people['Bruce Wayne']
13121327
"""
1313-
if isinstance(pattern, Group):
1314-
pattern = pattern.eval()
1315-
rx = re.compile(pattern)
1316-
return LGroup([v for v in self.eval() if rx.match(v)], axis=self.axis)
1328+
if deprecated is not None:
1329+
assert pattern is None and regex is None
1330+
regex = deprecated
1331+
warnings.warn("Group.matching() first argument will change to `pattern` in a later release. "
1332+
"If your pattern is a regular expression, use Group.matching(regex='yourpattern')."
1333+
"If your pattern is a 'simple pattern', use Group.matching(pattern='yourpattern').",
1334+
FutureWarning, stacklevel=2)
1335+
if pattern is not None and regex is not None:
1336+
raise ValueError("Cannot use both `pattern` and `regex` arguments at the same time in Group.matching()")
1337+
if pattern is None and regex is None:
1338+
raise ValueError("Must provide either `pattern` or `regex` argument in Group.matching()")
1339+
if isinstance(regex, Group):
1340+
regex = regex.eval()
1341+
if pattern is not None:
1342+
if isinstance(pattern, Group):
1343+
pattern = pattern.eval()
1344+
regex = fnmatch.translate(pattern)
1345+
match = re.compile(regex).match
1346+
return LGroup([v for v in self.eval() if match(v)], axis=self.axis)
13171347

13181348
def containing(self, substring):
13191349
"""

0 commit comments

Comments
 (0)