Skip to content

Commit e666916

Browse files
Update boolean dtype representation and improve regex handling in StringMethods
1 parent 9b917a2 commit e666916

File tree

2 files changed

+33
-23
lines changed

2 files changed

+33
-23
lines changed

pandas/core/strings/accessor.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,14 +1477,14 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=lib.no_default):
14771477
0 False
14781478
1 True
14791479
2 True
1480-
dtype: bool
1480+
dtype: boolean
14811481
14821482
Ensure consistent behavior with alternation patterns:
14831483
>>> ser = pd.Series(["asdf", "as"], dtype="string[pyarrow]")
14841484
>>> ser.str.fullmatch(r"(as)|(as)")
14851485
0 False
14861486
1 True
1487-
dtype: bool
1487+
dtype: boolean
14881488
"""
14891489
is_pyarrow = False
14901490
arr = self._data.array
@@ -1494,35 +1494,40 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=lib.no_default):
14941494
is_pyarrow = "Arrow" in arr_type
14951495
if not is_pyarrow and hasattr(arr, "dtype"):
14961496
dtype_str = str(arr.dtype)
1497-
is_pyarrow = "pyarrow" in dtype_str.lower() or "arrow" in dtype_str.lower()
1497+
is_pyarrow = (
1498+
"pyarrow" in dtype_str.lower() or "arrow" in dtype_str.lower()
1499+
)
14981500
if is_pyarrow and "|" in pat:
1501+
14991502
def _is_fully_wrapped(pattern):
1500-
if not (pattern.startswith('(') and pattern.endswith(')')):
1501-
return False
1503+
if not (pattern.startswith("(") and pattern.endswith(")")):
1504+
return False
15021505
inner = pattern[1:-1]
15031506
level = 0
15041507
escape = False
15051508
in_char_class = False
15061509
for char in inner:
15071510
if escape:
15081511
escape = False
1509-
continue
1510-
if char == '\\':
1512+
continue
1513+
if char == "\\":
15111514
escape = True
1512-
elif not in_char_class and char == '[':
1515+
elif not in_char_class and char == "[":
15131516
in_char_class = True
1514-
elif in_char_class and char == ']':
1517+
elif in_char_class and char == "]":
15151518
in_char_class = False
15161519
elif not in_char_class:
1517-
if char == '(':
1520+
if char == "(":
15181521
level += 1
1519-
elif char == ')':
1522+
elif char == ")":
15201523
if level == 0:
15211524
return False
15221525
level -= 1
15231526
return level == 0
1524-
if not (pat.startswith('(') and pat.endswith(')') and
1525-
_is_fully_wrapped(pat)):
1527+
1528+
if not (
1529+
pat.startswith("(") and pat.endswith(")") and _is_fully_wrapped(pat)
1530+
):
15261531
pat = f"({pat})"
15271532
result = self._data.array._str_fullmatch(pat, case=case, flags=flags, na=na)
15281533
return self._wrap_result(result, fill_value=na, returns_string=False)
Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
import pytest
2-
from pandas import (
3-
Series,
4-
)
2+
3+
from pandas import Series
4+
5+
56
@pytest.mark.parametrize("dtype", ["string[pyarrow]", str])
67
def test_string_array(dtype):
7-
test_series = Series(['asdf', 'as'], dtype=dtype)
8-
regex = r'((as)|(as))'
9-
regex2 = r'(as)|(as)'
8+
test_series = Series(["asdf", "as"], dtype=dtype)
9+
regex = r"((as)|(as))"
10+
regex2 = r"(as)|(as)"
1011
assert list(test_series.str.fullmatch(regex)) == [False, True]
1112
assert list(test_series.str.fullmatch(regex2)) == [False, True]
13+
14+
1215
@pytest.mark.parametrize(
1316
"data, pattern, expected",
1417
[
@@ -18,14 +21,16 @@ def test_string_array(dtype):
1821
def test_string_match(data, pattern, expected):
1922
ser = Series(data)
2023
assert list(ser.str.fullmatch(pattern)) == expected
24+
25+
2126
@pytest.mark.parametrize("dtype", ["string[pyarrow]", str])
2227
@pytest.mark.parametrize(
2328
"pattern, expected",
2429
[
25-
(r'(foo)|((as)(df)?)', [True, True, True]),
26-
('foo|as', [False, True, True]),
30+
(r"(foo)|((as)(df)?)", [True, True, True]),
31+
("foo|as", [False, True, True]),
2732
],
2833
)
2934
def test_string_alternation_patterns(dtype, pattern, expected):
30-
ser = Series(['asdf', 'foo', 'as'], dtype=dtype)
31-
assert list(ser.str.fullmatch(pattern)) == expected
35+
ser = Series(["asdf", "foo", "as"], dtype=dtype)
36+
assert list(ser.str.fullmatch(pattern)) == expected

0 commit comments

Comments
 (0)